Source code for logilab.database.ftiquery
# copyright 2003-2011 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
#
# This file is part of logilab-database.
#
# logilab-database is free software: you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as published by the
# Free Software Foundation, either version 2.1 of the License, or (at your
# option) any later version.
#
# logilab-database is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
# for more details.
#
# You should have received a copy of the GNU Lesser General Public License along
# with logilab-database. If not, see <http://www.gnu.org/licenses/>.
"""Query objects for Generic Indexer.
"""
__docformat__ = "restructuredtext en"
from logilab.database.fti import StopWord, tokenize
[docs]class Query:
"""a query is the object manipulated by the indexer
the query parser'll call add_word and add_phrase on this object accoring to
the query string (see query.g for the query string's grammar)
"""
def __init__(self, normalize):
self.normalize = normalize
self.words = {}
self.phrases = []
[docs] def add_word(self, word):
"""add a single word query"""
try:
word = self.normalize(word)
except StopWord:
return
# all single word queries'll be in a single KeywordsQuery
# so delay instantiation and remove duplicate words
self.words[word] = 1
[docs] def add_phrase(self, phrase):
"""add a single phrase query"""
tokens = []
for word in tokenize(phrase):
try:
tokens.append(self.normalize(word))
except StopWord:
continue
self.phrases.append(PhraseQuery(tokens))
[docs] def execute(self, cursor):
"""execute this query using the given cursor
yield a list of 2-uple (rating, uid)
"""
assert self.words or self.phrases
# keywords query
if not self.words:
results = {}
else:
results = KeywordsQuery(sorted(self.words.keys())).dict_query(cursor)
if not results:
raise StopIteration()
# phrase queries
for q in self.phrases:
_results = q.dict_query(cursor, results and results.keys() or None)
if not _results:
yield ()
# return ()
# adjust rating
for uid, rating in results.items():
try:
_results[uid] += rating
except Exception:
continue
results = _results
for uid, rating in results.items():
yield (rating, uid)
[docs]class KeywordsQuery:
"""
a keywords query'll look for uid matching all those words in any order
"""
def __init__(self, words):
self.words = words
[docs] def dict_query(self, cursor, uids=None):
"""execute this query using the given cursor
the query maybe restricted to a given list of uids
return a dict with uid as keys and rating as value
"""
results = {}
attrs = {}
tables, select = [], []
for i in range(len(self.words)):
tables.append("appears as appears%d, word as word%d" % (i, i))
select.append("word%d.word = %%(word%d)s " % (i, i))
select.append("word%d.word_id = appears%d.word_id " % (i, i))
attrs["word%d" % i] = self.words[i]
if i > 0:
select.append("appears%d.uid = appears%d.uid " % (i - 1, i))
query = (
"SELECT count(*) as rating, appears0.uid FROM "
+ ", ".join(tables)
+ " WHERE "
+ " AND ".join(select)
+ " GROUP BY appears0.uid ;"
)
cursor.execute(query, attrs)
for rating, uid in cursor.fetchall():
results[uid] = rating
return results
[docs]class PhraseQuery:
"""
a phrase query'll look for uid matching all phrase's tokens in the same order
"""
def __init__(self, tokens):
self.tokens = tokens
[docs] def dict_query(self, cursor, uids=None):
"""execute this query using the given cursor
the query maybe restricted to a given list of uids
return a dict with uid as keys and rating as value
"""
results = {}
if uids is not None:
uids = ", ".join([str(uid) for uid in uids])
restrict = "AND uid in (%s)" % uids
else:
restrict = ""
query = (
"SELECT uid, pos FROM appears,word "
"WHERE word.word = '%s'"
"AND word.word_id = appears.word_id %s" % (self.tokens[0], restrict)
)
cursor.execute(query)
for uid, pos in cursor.fetchall():
w_pos = pos
matches_all = 1
for t in self.tokens[1:]:
w_pos += 1
cursor.execute(
"SELECT appears.uid "
"FROM appears,word "
"WHERE word.word = %(word)s "
"AND appears.pos = %(pos)s "
"AND appears.uid = %(uid)s "
"AND word.word_id = appears.word_id ;",
{"word": t, "uid": uid, "pos": w_pos},
)
if not cursor.fetchall():
matches_all = 0
break
if matches_all:
results[uid] = results.get(uid, 0) + 1
return results