| Home | Trees | Indices | Help |
|
|---|
|
|
1 # -*- coding: utf-8 -*-
2
3 # -*- python -*-
4
5 # pylogsparser - Logs parsers python library
6 #
7 # Copyright (C) 2011 Wallix Inc.
8 #
9 # This library is free software; you can redistribute it and/or modify it
10 # under the terms of the GNU Lesser General Public License as published by the
11 # Free Software Foundation; either version 2.1 of the License, or (at your
12 # option) any later version.
13 #
14 # This library is distributed in the hope that it will be useful, but WITHOUT
15 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
16 # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
17 # details.
18 #
19 # You should have received a copy of the GNU Lesser General Public License
20 # along with this library; if not, write to the Free Software Foundation, Inc.,
21 # 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #
23
24 """In this module we define a regular expression used to fetch the most common
25 robots."""
26
27 import re
28
29 # taken from genrobotlist.pl in the awstats project : http://awstats.cvs.sourceforge.net
30 robots = [
31 'antibot',
32 'appie',
33 'architext',
34 'bingbot',
35 'bjaaland',
36 'digout4u',
37 'echo',
38 'fast-webcrawler',
39 'ferret',
40 'googlebot',
41 'gulliver',
42 'harvest',
43 'htdig',
44 'ia_archiver',
45 'askjeeves',
46 'jennybot',
47 'linkwalker',
48 'lycos',
49 'mercator',
50 'moget',
51 'muscatferret',
52 'myweb',
53 'netcraft',
54 'nomad',
55 'petersnews',
56 'scooter',
57 'slurp',
58 'unlost_web_crawler',
59 'voila',
60 'voyager',
61 'webbase',
62 'weblayers',
63 'wisenutbot',
64 'aport',
65 'awbot',
66 'baiduspider',
67 'bobby',
68 'boris',
69 'bumblebee',
70 'cscrawler',
71 'daviesbot',
72 'exactseek',
73 'ezresult',
74 'gigabot',
75 'gnodspider',
76 'grub',
77 'henrythemiragorobot',
78 'holmes',
79 'internetseer',
80 'justview',
81 'linkbot',
82 'metager-linkchecker',
83 'linkchecker',
84 'microsoft_url_control',
85 'msiecrawler',
86 'nagios',
87 'perman',
88 'pompos',
89 'rambler',
90 'redalert',
91 'shoutcast',
92 'slysearch',
93 'surveybot',
94 'turnitinbot',
95 'turtlescanner',
96 'turtle',
97 'ultraseek',
98 'webclipping.com',
99 'webcompass',
100 'yahoo-verticalcrawler',
101 'yandex',
102 'zealbot',
103 'zyborg',
104 ]
105 robot_regex = re.compile("|".join(robots), re.IGNORECASE)
106
| Home | Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0.1 on Sun Apr 1 08:26:26 2012 | http://epydoc.sourceforge.net |