undef $/; my $search = shift; while (<>) { next unless /$search/i; print "$ARGV contains $search\n"; } |
use Plucene::Simple; my $dir = "./data/plucene.simple.full"; my $index = Plucene::Simple->open( $dir ); local undef $/; while (<>) { my ($id) = ($ARGV =~ /(\d+).txt/); $index->index_document($id => $_); } $index->optimize; |
my $query = join(' ',@ARGV); if ($query) { my $index = Plucene::Simple->open("./data/plucene.simple.full"); my @ids = $index->search($query); foreach my $id (@ids) { my $text = read_file("text/$id.txt"); my @keywords = split /\s+/, $query; my $match = Text::Context->new($text, @keywords); print $match->as_text,"\n"; } } |
my $indexer = Plucene::Index::Writer->new( "./data/plucene.full", Plucene::Analysis::SimpleAnalyzer->new(), # cheat 1); local undef $/; while (<>) { my ($id) = ($ARGV =~ /(\d+).txt/); my $doc = Plucene::Document->new(); $doc->add(Plucene::Document::Field->Keyword(id => $id)); $doc->add(Plucene::Document::Field->UnStored(text => $_)); $indexer->add_document($doc); } $indexer->optimize; |
my $searcher = Plucene::Search::IndexSearcher->new( "./data/plucene.full" ); my $parser = Plucene::QueryParser->new({ analyzer => Plucene::Plugin::SimpleAnalyzer->new(), default => "text"}); my $parsedq = $parser->parse($querystring); my @docs; my $hc = Plucene::Search::HitCollector->new( collect => sub { my ($self, $doc, $score) = @_; push @docs, $searcher->doc($doc) }; $searcher->search_hc($parsedq, $hc); |
@results = map { $_->get("id")->string } @docs foreach my $id (@results) { my $text = read_file("text/$id.txt"); my @keywords = split /\s+/, $query; my $match = Text::Context->new($text, @keywords); print $match->as_text,"\n"; } |
use Search::Indexer; my $indexer = new Search::Indexer( dir => './data/SearchIndexer', writeMode => 1); while (<>) { my ($id) = ($ARGV =~ /(\d+).txt/); $indexer->add($id, $_); } |
my $indexer = new Search::Indexer(dir => './data/SearchIndexer', writeMode => 1); my $result = $indexer->search(join(' ',@ARGV)); my @ids = keys %{$result->{scores}}; my $killedWords = join ", ", @{$result->{killedWords}}; print scalar(@ids), " documents found\n", ; print "words $killedWords were ignored during the search\n" if $killedWords; foreach my $id (@ids) { my $text = read_file("text/$id.txt"); my $score = $result->{scores}{$id}; my $excerpts = join "\n", @{$indexer->excerpts($text, $result->{regex})}; print "$id, score $score:\n$excerpts\n\n"; } |
my $analyzer = KinoSearch::Analysis::PolyAnalyzer->new( language => 'en', ); my $invindexer = KinoSearch::InvIndexer->new( analyzer => $analyzer, invindex => './data/kino', create => 1); $invindexer->spec_field( name => 'text', vectorized => 1); $invindexer->spec_field( name => 'id', analyzed => 0); |
while (<>) { my ($id) = ($ARGV =~ /(\d+).txt/); my $doc = $invindexer->new_doc; $doc->set_value( text => $_); $doc->set_value( id => $id); $invindexer->add_doc($doc); } $invindexer->finish; |
my $analyzer = KinoSearch::Analysis::PolyAnalyzer->new( language => 'en'); my $searcher = KinoSearch::Searcher->new( invindex => './data/kino', analyzer => $analyzer); my $hits = $searcher->search(join(' ',@ARGV)); my $highlighter = KinoSearch::Highlight::Highlighter->new( excerpt_field => 'text' ); $hits->create_excerpts( highlighter => $highlighter ); while ( my $hit = $hits->fetch_hit_hashref ) { print "$hit->{id}: $hit->{score}\n$hit->{excerpt}"; } |
use Search::Xapian ':db'; my $db = Search::Xapian::WritableDatabase->new( './data/xapian',DB_CREATE_OR_OPEN); my $stemmer = Search::Xapian::Stem->new('english'); while (<>) { my ($id) = ($ARGV =~ /(\d+).txt/); my $doc = Search::Xapian::Document->new; $doc->set_data($id); foreach my $word (split(' ',$_)) { next if (length $word < 4); $doc->add_term($stemmer->stem_word($word)); } $db->add_document($doc); } |
use Search::Xapian qw(:ops); my $db = Search::Xapian::Database->new( './data/xapian' ); # if stemming, or parsing text, see QueryParser my $enq = $db->enquire( OP_OR, @ARGV ); foreach my $match ( $enq->matches(0,12) ) { printf "%d score %d%%", $match->get_docid(), $match->get_percent(); my $doc = $match->get_document(); print read_file(sprintf("text/%i.txt",$doc->get_data())); } |