;; component-find.lisp ;; ;; Sarah Tierney ;; updated 11 Aug 2004 to use WordNet 2.0 (Geoffrey King) ;; Last updated Aug 31st [dgt] (unless (find-package :km) (make-package :km))(in-package :km)(setq *using-km-package* t) ;;(compile-file "/projects/rkf/util/wordnet.lisp") (load "/projects/rkf/util/wordnet3.lisp") ;;(load "/usr/spool/net/www/users/mfkb/RKF/smedict/lexical-search.lisp") (wn:init) (require :regexp2) ;; ===================================================================================== ;; wrapper for component-find that returns a sorted list of similar concepts in the component-library ;; TODO: ;; - return WN synset along with components ;; word = string, index-file correponds to the clib index file ;; returns lexically and semantically similar concepts ;; output format: ( (component depth-in-hierarchy part-of-speech distance)* ) ;; distance is 0 for exact lexical match, 0.5 for approx lexical or distance in WN (defun component-find-integrated (word index-file &optional (index-var nil)) ;;(format *aura-trace-sem-search* "~% Semantic Search input => word: ~a index-file: ~a" word index-file) (if index-var (setf *component-index* index-var) (load index-file)) (let ((result (reverse (remove-duplicates (sort (append (lexical-search word) ;; give preference to lexical matches for keyword (mapcar #'(lambda (entry) (list (first entry) (second entry) (third entry) (+ 1 (fourth entry)))) (component-find word))) #'index-entry->=) :key #'car :test #'string=)))) ;;(mapcar #'(lambda (r) ;;(format *aura-trace-sem-search* "~% Semantic Search output => component: ~a depth: ~a part-of-speech: ~a distance: ~a" (first entry) (second entry) (third entry) (fourth entry)))) result)) ;; just the semantic search (defun component-find-sorted (word index-file) (load index-file) (reverse (remove-duplicates (sort (component-find word) #'index-entry->=) :key #'car :test #'string=))) ;; ;; component-find ;; ;; Inputs: a word, and an optional part of speech {NOUN, VERB, ADJ, ADV} ;; Output: a list of lists of matching synonyms, level in component-library, ;; parts of speech labels, and level found in searching label, i.e.: ;; (( )...) ;; (defun component-find (word &key (pos 'all)) (when (and (stringp word) (<= (length word) 25)) (if (eq pos 'all) (remove-duplicates (nconc (component-find-with-part-speech word "verb") (component-find-with-part-speech word "noun") (component-find-with-part-speech word "adv") (component-find-with-part-speech word "adj")) :test #'equal) (remove-duplicates (component-find-with-part-speech word (string-downcase (symbol-name pos))) :test #'equal)))) ;; function gets word and part of speech from caller and searches for ;; matches under that part of speech. returns a list of lists of ;; matching synonyms, level in component library, parts of speech ;; labels, and level found in searching ;; ie : (( )...) (defun component-find-with-part-speech (word part-speech) (nconc (refine-component-check (component-check word part-speech)) (if (string= part-speech "adj") (hypernym-check1 (wn:hypernym-offsets word part-speech)) (hypernym-check (wn:hypernym-offsets word part-speech))))) ;; refine-component-check returns an empty list if there are no ;; component matches and returns a list of lists of matching ;; components, level in component library, ptspeech, and level in ;; search--- can only match 1 component to word given (defun refine-component-check (lst) (remove-duplicates (mapcar #'(lambda (str) (append str (list 0))) lst) :test #'equal)) ;; component-check returns list of lists of matching components and ;; level in component library (defun component-check (word pos) (let ((matches)) (dolist (obj *component-index*) (when (and (keyword= (string-downcase (first (car obj))) (string-downcase word)) (string= (third (car obj)) pos)) (push (append (cdr obj) (get-part-speech obj)) matches))) matches)) ;; gets the part of speech that corresponds to the matching offset (defun get-part-speech (obj) (list (third (first obj)))) ;; hypernym-check returns the concatenated list of all matching ;; components of all levels of the hypernym tree for each synonym of ;; the given word (defun hypernym-check (hypernym-list) (if (null hypernym-list) nil (concatenate 'list (looptree-offset (car hypernym-list)) (hypernym-check (cdr hypernym-list))))) ;; adds attribute lists to each synset (defun hypernym-check1 (hypernym-list) (if (null hypernym-list) nil (concatenate 'list (looptree-offset (s hypernym-list "a")) (hypernym-check1 (cdr hypernym-list))))) (defun s (list pos) (mapcan #'(lambda (x) (wn:attribute-list (car x) "a")) list)) ;; looptree-offset finds the matches in the component library for the words in the synsets of ;; the hypernym tree (defun looptree-offset (offset-list) (let (( lev 1) (matches4 '() )) (dolist (obj offset-list) (if (null(looptree2-offset obj) ) '() (setf matches4 (concatenate 'list (append-level (looptree2-offset obj) lev) matches4) )) (setf lev (+ lev 1))) matches4)) ;; appendlev adds the level of the word found in the hypernym tree to the output list (defun append-level (lst level) (mapcar #'(lambda (str2) (concatenate 'list str2 (list level))) lst)) ;; looptree2-offset searches through the words who have synonyms and offsets associated with the word ;; and searches for matches in the component library (defun looptree2-offset (newp) (let (( matches3 '() )) (dolist (obj *component-index*) (cond ((equal (fourth (first obj)) newp) (setf matches3 (cons (append (list (nth 1 obj) (nth 2 obj)) (get-part-speech obj)) matches3))))) matches3)) ;; ================================================================================================================== ;; ;; LEXICAL SEARCH ;; ;; returns a list of components that match keyword ;; result = (km-path spec-path dist depth) ;; for lexical search dist = 0 for exact match, maxint for approx (defun lexical-search (keyword) (let ((results)) (dolist (entry *component-index*) ;; entry = ((wordnet-keyword sense# pos synset#) comp-keyword depth km-path spec-path) (multiple-value-bind (matched whole-match comp) (excl::match-regexp (concatenate 'string ".*\\(" (excl::replace-re (string-downcase keyword) "[_-]" " ") "\\).*") (canonical-entry (index-entry-comp-name entry)) :case-fold t) (if matched (push (list (index-entry-comp-name entry) (index-entry-depth entry) "-" (if (keyword= whole-match comp) 0 0.5)) results)))) results)) ;; ================================================================================================================== ;; ================================================================================================================== ;; ;; INDEX ENTRY FUNCTIONS ;; ;; index-entry structure: ;; ((keywork WN-sense# pos WN-synset) Component depth-in-hierarchy) (defun index-entry-comp-name (index-entry) (second index-entry)) (defun index-entry-depth (index-entry) (third index-entry)) (defun index-entry-km-path (index-entry) (fourth index-entry)) (defun index-entry-spec-path (index-entry) (fifth index-entry)) (defun make-index-entry (&key wordnet-keyword sense# pos synset# comp-keyword depth km-path spec-path) (list (list wordnet-keyword sense# pos synset#) comp-keyword depth km-path spec-path)) ;; replaces _- with a single space " " (defun canonical-entry (entry-name) (excl::replace-regexp (string-left-trim "*" (string-downcase entry-name)) "[_-]" " ")) ;; ;; less-than predicate for entries returned by component-find-aux. ;; Entry (Comp-name level pos dist) is sorted ;; - ascending on dist, ;; - descending on level ;; - ascending on Comp-name (defun index-entry-< (x y) (or (< (nth 3 x) (nth 3 y)) (and (= (nth 3 x) (nth 3 y)) (> (nth 1 x) (nth 1 y))) (and (= (nth 3 x) (nth 3 y)) (= (nth 1 x) (nth 1 y)) (string< (nth 0 x) (nth 0 y))))) ;; greater or equal for entries returned by component-find-aux (defun index-entry->= (x y) (index-entry-< y x)) ;; ============================================================================================================ ;; keyword equality testing ;; solves inconsistency problem between index file (sep=\\s) and WN (sep=_) (defun keyword= (k1 k2) (string= (excl::replace-re (string-downcase k1) "[\\s-]" "_") (excl::replace-re (string-downcase k2) "[\\s-]" "_")))