• Skip to content
  • Skip to link menu
KDE 4.3 API Reference
  • KDE API Reference
  • kdelibs
  • Sitemap
  • Contact Us
 

KHTML

khtml_filter.cpp

Go to the documentation of this file.
00001 /* This file is part of the KDE project
00002 
00003    Copyright (C) 2005 Ivor Hewitt     <ivor@kde.org>
00004    Copyright (C) 2008 Maksim Orlovich <maksim@kde.org>
00005    Copyright (C) 2008 Vyacheslav Tokarev <tsjoker@gmail.com>
00006 
00007    This library is free software; you can redistribute it and/or
00008    modify it under the terms of the GNU Library General Public
00009    License as published by the Free Software Foundation; either
00010    version 2 of the License, or (at your option) any later version.
00011 
00012    This library is distributed in the hope that it will be useful,
00013    but WITHOUT ANY WARRANTY; without even the implied warranty of
00014    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015    Library General Public License for more details.
00016 
00017    You should have received a copy of the GNU Library General Public License
00018    along with this library; see the file COPYING.LIB.  If not, write to
00019    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00020    Boston, MA 02110-1301, USA.
00021 */
00022 
00023 #include "khtml_filter_p.h"
00024 #include <QDebug>
00025 
00026 // rolling hash parameters
00027 #define HASH_P (1997)
00028 #define HASH_Q (17509)
00029 // HASH_MOD = (HASH_P^7) % HASH_Q
00030 #define HASH_MOD (523)
00031 
00032 namespace khtml {
00033 
00034 void FilterSet::addFilter(const QString& filterStr)
00035 {
00036     QString filter = filterStr;
00037     
00038     if (filter.startsWith(QLatin1Char('!')))
00039         return;
00040 
00041     // Strip leading @@
00042     int first = 0;
00043     int last  = filter.length() - 1;
00044     if (filter.startsWith(QLatin1String("@@")))
00045         first = 2;
00046         
00047     // Strip options, we ignore them for now.
00048     int dollar = filter.lastIndexOf(QLatin1Char('$'));
00049     if (dollar != -1)
00050         last = dollar - 1;
00051     
00052     // Perhaps nothing left?
00053     if (first > last)
00054         return;
00055         
00056     filter = filter.mid(first, last - first + 1);
00057     
00058     // Is it a regexp filter?
00059     if (filter.length()>2 && filter.startsWith(QLatin1Char('/')) && filter.endsWith(QLatin1Char('/')))
00060     {
00061         QString inside = filter.mid(1, filter.length()-2);
00062         QRegExp rx(inside);
00063         reFilters.append(rx);
00064 //         qDebug() << "R:" << inside;
00065     }
00066     else
00067     {
00068         // Nope, a wildcard one.
00069         // Note: For these, we also need to handle |.
00070         
00071         // Strip wildcards at the ends
00072         first = 0;
00073         last  = filter.length() - 1;
00074         
00075         while (first < filter.length() && filter[first] == QLatin1Char('*'))
00076             ++first;
00077             
00078         while (last >= 0 && filter[last] == QLatin1Char('*'))
00079             --last;
00080             
00081         if (first > last)
00082             filter = QLatin1String("*"); // erm... Well, they asked for it.
00083         else
00084             filter = filter.mid(first, last - first + 1);
00085             
00086         // Now, do we still have any wildcard stuff left?
00087         if (filter.contains("*") || filter.contains("?")) 
00088         {
00089 //             qDebug() << "W:" << filter;
00090             // check if we can use RK first (and then check full RE for the rest) for better performance
00091             int aPos = filter.indexOf('*');
00092             if (aPos < 0)
00093                 aPos = filter.length();
00094             int qPos = filter.indexOf('?');
00095             if (qPos < 0)
00096                 qPos = filter.length();
00097             int pos = qMin(aPos, qPos);
00098             if (pos > 7) {
00099                 QRegExp rx;
00100 
00101                 rx.setPatternSyntax(QRegExp::Wildcard);
00102                 rx.setPattern(filter.mid(pos));
00103 
00104                 stringFiltersMatcher.addWildedString(filter.mid(0, pos), rx);
00105 
00106             } else {
00107                 QRegExp rx;
00108 
00109                 rx.setPatternSyntax(QRegExp::Wildcard);
00110                 rx.setPattern(filter);
00111                 reFilters.append(rx);
00112             }
00113         }
00114         else
00115         {
00116             // Fast path
00117             stringFiltersMatcher.addString(filter);
00118         }
00119     }
00120 }
00121 
00122 bool FilterSet::isUrlMatched(const QString& url)
00123 {
00124     if (stringFiltersMatcher.isMatched(url))
00125         return true;
00126 
00127     for (int c = 0; c < reFilters.size(); ++c)
00128     {
00129         if (url.contains(reFilters[c]))
00130             return true;
00131     }
00132 
00133     return false;
00134 }
00135 
00136 QString FilterSet::urlMatchedBy(const QString& url)
00137 {
00138     QString by;
00139 
00140     if (stringFiltersMatcher.isMatched(url, &by))
00141         return by;
00142 
00143     for (int c = 0; c < reFilters.size(); ++c)
00144     {
00145         if (url.contains(reFilters[c]))
00146         {
00147             by = reFilters[c].pattern();
00148             break;
00149         }
00150     }
00151 
00152     return by;
00153 }
00154 
00155 void FilterSet::clear()
00156 {
00157     reFilters.clear();
00158     stringFiltersMatcher.clear();
00159 }
00160 
00161 
00162 void StringsMatcher::addString(const QString& pattern)
00163 {
00164     if (pattern.length() < 8) {
00165         // handle short string differently
00166         shortStringFilters.append(pattern);
00167     } else {
00168         // use modified Rabin-Karp's algorithm with 8-length string hash
00169         // i.e. store hash of first 8 chars in the HashMap for fast look-up
00170         stringFilters.append(pattern);
00171         int ind = stringFilters.size() - 1;
00172         int current = 0;
00173 
00174         // compute hash using rolling hash
00175         // hash for string: x0,x1,x2...xn-1 will be:
00176         // (p^(n-1)*x0 + p^(n-2)*x1 + ... + p * xn-2 + xn-1) % q
00177         // where p and q some wisely-chosen integers
00178         /*for (int k = 0; k < 8; ++k)*/
00179         int len = pattern.length();
00180         for (int k = len - 8; k < len; ++k)
00181             current = (current * HASH_P + pattern[k].unicode()) % HASH_Q;
00182 
00183         // insert computed hash value into HashMap
00184         WTF::HashMap<int, QVector<int> >::iterator it = stringFiltersHash.find(current + 1);
00185         if (it == stringFiltersHash.end()) {
00186             QVector<int> list;
00187             list.append(ind);
00188             stringFiltersHash.add(current + 1, list);
00189             fastLookUp.setBit(current);
00190         } else {
00191             it->second.append(ind);
00192         }
00193     }
00194 }
00195 
00196 void StringsMatcher::addWildedString(const QString& prefix, const QRegExp& rx)
00197 {
00198     rePrefixes.append(prefix);
00199     reFilters.append(rx);
00200     int index = -rePrefixes.size();
00201 
00202     int current = 0;
00203     for (int k = 0; k < 8; ++k)
00204         current = (current * HASH_P + prefix[k].unicode()) % HASH_Q;
00205 
00206     // insert computed hash value into HashMap
00207     WTF::HashMap<int, QVector<int> >::iterator it = stringFiltersHash.find(current + 1);
00208     if (it == stringFiltersHash.end()) {
00209         QVector<int> list;
00210         list.append(index);
00211         stringFiltersHash.add(current + 1, list);
00212         fastLookUp.setBit(current);
00213     } else {
00214         it->second.append(index);
00215     }
00216 }
00217 
00218 bool StringsMatcher::isMatched(const QString& str, QString *by) const
00219 {
00220     // check short strings first
00221     for (int i = 0; i < shortStringFilters.size(); ++i) {
00222         if (str.contains(shortStringFilters[i]))
00223         {
00224             if (by != 0) *by = shortStringFilters[i];
00225             return true;
00226         }
00227     }
00228 
00229     int len = str.length();
00230     int k;
00231 
00232     int current = 0;
00233     int next = 0;
00234     // compute hash for first 8 characters
00235     for (k = 0; k < 8 && k < len; ++k)
00236         current = (current * HASH_P + str[k].unicode()) % HASH_Q;
00237 
00238     WTF::HashMap<int, QVector<int> >::const_iterator hashEnd = stringFiltersHash.end();
00239     // main Rabin-Karp's algorithm loop
00240     for (k = 7; k < len; ++k, current = next) {
00241         // roll the hash if not at the end
00242         // (calculate hash for the next iteration)
00243         if (k + 1 < len)
00244             next = (HASH_P * ((current + HASH_Q - ((HASH_MOD * str[k - 7].unicode()) % HASH_Q)) % HASH_Q) + str[k + 1].unicode()) % HASH_Q;
00245 
00246         if (!fastLookUp.testBit(current))
00247             continue;
00248 
00249         // look-up the hash in the HashMap and check all strings
00250         WTF::HashMap<int, QVector<int> >::const_iterator it = stringFiltersHash.find(current + 1);
00251 
00252         // check possible strings
00253         if (it != hashEnd) {
00254             for (int j = 0; j < it->second.size(); ++j) {
00255                 int index = it->second[j];
00256                 // check if we got simple string or REs prefix
00257                 if (index >= 0) {
00258                     int flen = stringFilters[index].length();
00259                     if (k - flen + 1 >= 0 && stringFilters[index] == str.midRef(k - flen + 1 , flen))
00260                     {
00261                         if (by != 0) *by = stringFilters[index];
00262                         return true;
00263                     }
00264                 } else {
00265                     index = -index - 1;
00266                     int flen = rePrefixes[index].length();
00267                     if (k - 8 + flen < len && rePrefixes[index] == str.midRef(k - 7, flen) &&
00268                             str.indexOf(reFilters[index], k - 7 + flen) == k - 7 + flen)
00269                     {
00270                         if (by != 0) *by = rePrefixes[index]+reFilters[index].pattern();
00271                         return true;
00272                     }
00273                 }
00274             }
00275         }
00276     }
00277 
00278     return false;
00279 }
00280 
00281 void StringsMatcher::clear()
00282 {
00283     stringFilters.clear();
00284     shortStringFilters.clear();
00285     reFilters.clear();
00286     rePrefixes.clear();
00287     stringFiltersHash.clear();
00288     fastLookUp.resize(HASH_Q);
00289     fastLookUp.fill(0, 0, HASH_Q);
00290 }
00291 
00292 }
00293 
00294 // kate: indent-width 4; replace-tabs on; tab-width 4; space-indent on;

KHTML

Skip menu "KHTML"
  • Main Page
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Namespace Members
  • Class Members
  • Related Pages

kdelibs

Skip menu "kdelibs"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • Kate
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • kio
  • KIOSlave
  • KJS
  •   KJS-API
  •   WTF
  • kjsembed
  • KNewStuff
  • KParts
  • KPty
  • Kross
  • KUtils
  • Nepomuk
  • Plasma
  • Solid
  • Sonnet
  • ThreadWeaver
Generated for kdelibs by doxygen 1.6.1
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal