kzimmermann
/
unixsearch


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135
							#!/usr/bin/env python
#
# THE QUERIER
#
# The querier will act as the intermediate between the user interface and the
# Crawler's database. Although it can act as a user interface itself, that is
# not the goal of the querier. Rather, it focuses on being able to present the
# data given by the crawler in the most diverse ways possible.
#
# VERSIONING:
# - A text-based user interface allowing for multiple queries per session, and
#   an option to recrawl the filesystem.
# - A text-based user interface allowing for index searching within plain text
#   files.
# - A module that allows for the querying and preparation of the results to be
#   displayed in another interface module.
#

import os
import sys
import sqlite3 as sql
import crawl

# Session variables:
query = "nothing"
results = "nothing still"
total = 0

# For the sake of modularity, we have to set methods for compatibility:

# searching for name only
def simple_search(token):
    if token == "":
        return
    result_set = crawl.cursor.execute("SELECT path FROM fileindex WHERE name LIKE ?", ('%{}%'.format(token),))
    return result_set

# searching for file contents:
def index_search(token):
    if token == "":
        return
    result_set = crawl.cursor.execute("SELECT path, contents FROM fileindex WHERE contents LIKE ?", ('%{}%'.format(token),))
    return result_set

def refresh():
    google = crawl.Crawler()
    google.crawl('/')
    return True

if __name__ == "__main__":
    # Start of program:
    print "========="
    print "Linux and Unix file locating utility"
    print "========="

    # Querying session:
    while True:
        total = 0
        query = raw_input("\nType the name of the file you're looking for, or '.com' for a list of commands:\n>_ ")
        if query == ".com":
            print """.com - shows a list of commands
.quit - exits the program
.index - searches within files
.refresh - refreshes the database by crawling again"""
            continue
        if query == ".quit":
            break
    
        if query == ".index":
            # Perform index searching and show a snippet of the results:
            query = raw_input("Type a query to search for: ")
            results = index_search(query)
            for result in results:
                print "=========\nIn %s, we found: " % result[0]
    
                # creating 10-word long snippets:
                try:
                    before = result[1].split(query)[0].split(" ")
                    before.reverse() # required for iteration... 
                except IndexError:
                    before = ['']
                try:
                    after = result[1].split(query)[1].split(" ")
                except IndexError:
                    after = ['']
    
                snippet_before = []
                snippet_after = []
    
                word_count = 0
                snippet_size = 5 # word limit to each side 
                while word_count < snippet_size:
                    try:
                        snippet_before.append(before[word_count])
                    except IndexError:
                        # If we are on either "margin" of the text content, we may 
                        # not be able to extract all the required words:
                        pass
                    try:
                        snippet_after.append(after[word_count])
                    except IndexError:
                        pass
                    word_count += 1
     
                    # Now we reverse the before part once again.
                    snippet_before.reverse()
                    snippet = " ".join(snippet_before) + query + \
                              " ".join(snippet_after)
                print snippet + "\n"
                total += 1
            print "======\nFound %d files containing '%s'" % (total, query)
            continue
    
        if query == ".refresh":
            print "Refreshing database... this may take a while."
            google = crawl.Crawler()
            google.crawl('/')
            print "=======\nRefreshing complete. Please try again."
            continue
    
        if query == "":
            continue # this will avoid the huge overhead of listing everything!
    
        # standard file search
        results = simple_search(query)
        for result in results:
            print result[0]
            total += 1
        crawl.conn.commit()
        print "======\n%d files found, named '%s'" % (total, query)
    
    # Finish everything beautifully
    crawl.conn.close()
    print "Bye!"