experiments.h

Go to the documentation of this file.
00001 #ifndef TAGCOLL_EXPERIMENTS_H
00002 #define TAGCOLL_EXPERIMENTS_H
00003 
00008 /*
00009  * Copyright (C) 2005  Enrico Zini <enrico@debian.org>
00010  *
00011  * This program is free software; you can redistribute it and/or modify
00012  * it under the terms of the GNU General Public License as published by
00013  * the Free Software Foundation; either version 2 of the License, or
00014  * (at your option) any later version.
00015  *
00016  * This program is distributed in the hope that it will be useful,
00017  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00018  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00019  * GNU General Public License for more details.
00020  *
00021  * You should have received a copy of the GNU General Public License
00022  * along with this program; if not, write to the Free Software
00023  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00024  */
00025 
00026 #include <tagcoll/CardinalityStore.h>
00027 #include <tagcoll/Expression.h>
00028 
00029 #include <vector>
00030 #include <ostream>
00031 
00032 namespace Tagcoll
00033 {
00034 
00035 template <typename TAG>
00036 class Scores
00037 {
00038 protected:
00039     struct Score
00040     {
00041         Expression expr;
00042         float score;
00043         Score(const std::string& expr, float score) :
00044             expr(expr), score(score) {}
00045     };
00046     float defaultScore;
00047     std::vector<Score> scores;
00048 public:
00049     Scores(float def = 1.0) : defaultScore(def) {}
00050 
00051     void add(const std::string& expr, float score)
00052     {
00053         scores.push_back(Score(expr, score));
00054     }
00055 
00056     float operator()(const TAG& tag) const
00057     {
00058         for (typename std::vector<Score>::const_iterator i = scores.begin();
00059                 i != scores.end(); i++)
00060         {
00061             OpSet<TAG> tags;
00062             tags += tag;
00063             if (i->expr(tags))
00064                 return i->score;
00065         }
00066         return defaultScore;
00067     }
00068 
00069     float distance(const OpSet<TAG>& ts1, const OpSet<TAG>& ts2) const
00070     {
00071         float res = 0;
00072         OpSet<TAG> diff = (ts1 - ts2) + (ts2 - ts1);
00073         for (typename OpSet<TAG>::const_iterator i = diff.begin();
00074                 i != diff.end(); i++)
00075             res += (*this)(*i);
00076         return res;
00077     }
00078 };
00079 
00080 template <typename ITEM, typename TAG>
00081 class Normalizer : public CardinalityStore<ITEM, TAG>
00082 {
00083 protected:
00084     unsigned int max_threshold;
00085     unsigned int merge_threshold;
00086     unsigned int min_threshold;
00087     float maxMergeDist;
00088 
00089     typedef CardinalityStore<ITEM, TAG> tagsets_t;
00090 
00091     typedef std::map< OpSet<TAG>, std::vector< OpSet<TAG> > > distgraph_t;
00092     distgraph_t distGraph;
00093 
00094     bool addToGraph(const Scores<TAG>& scores, const OpSet<TAG>& ts1, const OpSet<TAG>& ts2);
00095     bool mergeTagsets(const OpSet<TAG>& ts1, const OpSet<TAG>& ts2);
00096     void removeAfterMerge(const OpSet<TAG>& ts, const OpSet<TAG>& merged);
00097 
00098 
00099 public:
00100     Normalizer(float mmd = 1.0) :
00101         max_threshold(14),
00102         merge_threshold(7),
00103         min_threshold(2),
00104         maxMergeDist(mmd) {}
00105 
00106     void buildGraph(const Scores<TAG>& scores);
00107 
00108     void normalize();
00109 };
00110 
00111 template <typename ITEM, typename TAG>
00112 class Graph : public CardinalityStore<ITEM, TAG>
00113 {
00114 protected:
00115     typedef CardinalityStore<ITEM, TAG> tagsets_t;
00116     
00117     unsigned int seq;
00118     std::map< OpSet<TAG>, int > handles;
00119 
00120     int getHandle(const OpSet<TAG>& node);
00121     void buildSubGraph(std::ostream& out, const OpSet<TAG>& node, OpSet< OpSet<TAG> >& selected, int maxdist, int maxlev);
00122 
00123 public:
00124     Graph() : seq(0) {}
00125 
00126     void buildGraph(std::ostream& out, const OpSet<TAG>& node, int maxdist = 3, int maxlev = 3);
00127     void buildGraphs(const std::string& dir, int maxdist = 3);
00128 };
00129 
00130 }
00131 
00132 // vim:set ts=4 sw=4:
00133 #endif

Generated on Sat Aug 19 00:46:17 2006 for libtagcoll by  doxygen 1.4.7