File Coverage

File:blib/lib/Mediawiki/Blame.pm
Coverage:99.3%

linestmtbrancondsubpodtimecode
1package Mediawiki::Blame;
2# $Revision: 9 $
3# $Date: 2007-08-12 16:36:55 +0200 (So, 12 Aug 2007) $
4
10
10
10
159
28
33
use 5.008;
5
10
10
10
13082
29
63
use utf8;
6
8
8
8
186
18
59
use strict;
7
8
8
8
50
16
60
use warnings;
8
8
8
8
112
24
24
use Algorithm::Annotate qw();
9
8
8
8
70
17
95
use Carp qw(croak);
10
8
8
8
123
25
86
use Class::Spiffy qw(-base field const);
11
8
8
8
123
23
27
use DateTime qw();
12
8
8
8
166
30
32
use DateTime::Format::ISO8601 qw();
13
8
8
8
157
36
49
use LWP::UserAgent qw();
14
8
8
8
134
31
27
use Mediawiki::Blame::Revision qw();
15
8
8
8
121
28
31
use Mediawiki::Blame::Line qw();
16
8
8
8
62
18
102
use Params::Validate qw(validate_with SCALAR);
17
8
8
8
57
17
23
use Perl::Version qw(); our $VERSION = Perl::Version->new('0.0.2')->stringify;
18
8
8
8
131
25
57
use Regexp::Common qw(number URI);
19
8
8
8
123
27
163
use Readonly qw(Readonly);
20
8
8
8
141
27
29
use XML::Twig qw();
21
22field 'export';
23field 'page';
24field 'ua_timeout';
25field '_revisions'; # hashref whose keys are r_ids and values are hashrefs
26field '_initial'; # r_id of the initial revision
27field '_lwp'; # LWP instance
28
29sub new {
30
24
1
2171
    my $class = shift;
31
24
102
    my $self = {};
32
24
433
    bless $self, $class;
33
34    validate_with(
35        params => \@_,
36        on_fail => sub {
37
12
104
            chomp (my $p = shift);
38
12
66
            croak $p;
39        },
40
24
187
        spec => {
41            export => {
42                regex => qr/\A $RE{URI} \z/msx
43            },
44            page => {
45                type => SCALAR,
46            },
47        },
48    );
49
50
12
42
    my %P = @_; # params as hash
51
52
12
206
    $self->export($P{export});
53
12
84
    $self->page($P{page});
54
55    {
56
12
12
34
30
        my $lwp_name;
57
12
4
4
4
4
4
4
38
76
17
16
35
10
10
        eval q{
58            use LWPx::ParanoidAgent qw();
59        };
60
12
203
        if ($@) {
61
0
0
            $lwp_name = 'LWP::UserAgent';
62        } else {
63
12
45
            $lwp_name = 'LWPx::ParanoidAgent';
64        };
65
66
12
126
        $self->_lwp($lwp_name->new);
67
12
48
        $self->_lwp->agent(
68            "Mediawiki::Blame/$VERSION (http://search.cpan.org/dist/Mediawiki-Blame/)"
69        );
70
12
12
405
47
        push @{ $self->_lwp->requests_redirectable }, 'POST';
71    };
72
73
12
614
    $self->ua_timeout(30); # seconds
74
12
76
    $self->_revisions({});
75
76
12
122
    $self->_xml_to_revisions(
77        $self->_post(
78            $self->_post_params({
79                after => 1980, # one revision after 1980, i.e. the initial
80                limit => 1,
81            })
82        )
83    );
84
85
6
76
    $self->_initial(
86        [$self->revisions]->[0]->r_id
87    );
88
89
6
44
    $self->_revisions({}); # reset
90
91
6
54
    return $self;
92};
93
94sub _is_now_or_a_datetime {
95
10
66
    my $p = shift;
96
10
55
    if ($p eq 'now') {
97
2
7
        return 1;
98    };
99
8
31
    _is_a_datetime($p);
100
6
21
    return 1;
101};
102
103sub _is_a_datetime {
104
14
43
    eval {
105
14
143
        DateTime::Format::ISO8601->parse_datetime(shift)
106    };
107
14
487
    if ($@) {
108
4
44
        croak substr $@, 0, (index $@, ' at '); # clean up stacktrace
109    };
110
10
32
    return 1;
111};
112
113sub _is_greater_or_equal_to_2 {
114
10
66
    my $p = shift;
115
10
73
    return ($p =~ /\A $RE{num}{int} \z/msx and $p >= 2);
116};
117
118sub _offset {
119
20
62
    my $self = shift;
120
20
58
    my $P = shift; # hashref
121
122
20
100
    for my $k ('before', 'after') {
123
34
174
        if (exists $P->{$k}) {
124
20
123
            Readonly my $STRF => '%FT%TZ'; # 2007-07-23T21:43:56Z
125
20
181
            if (($k eq 'before') and ($P->{$k} eq 'now')) {
126
2
24
                return DateTime->now->strftime($STRF);
127            };
128
18
223
            return DateTime::Format::ISO8601
129                ->parse_datetime($P->{$k})
130                ->strftime($STRF);
131        };
132    };
133};
134
135sub _post_params {
136
20
62
    my $self = shift;
137
20
60
    my $P = shift; # hashref
138
139
20
95
    my $offset = $self->_offset($P);
140
141
20
5324
    my %post_params = (
142        pages => $self->page,
143        offset => $offset,
144    );
145
146
20
128
    if (exists $P->{before}) {
147
6
31
        $post_params{dir} = 'desc';
148    };
149
150
20
117
    if (exists $P->{limit}) {
151
18
96
        $post_params{limit} = $P->{limit};
152    };
153
154
20
134
    return \%post_params;
155};
156
157sub fetch {
158
20
1
1061
    my $self = shift;
159
160    validate_with(
161        params => \@_,
162        on_fail => sub {
163
4
32
            chomp (my $p = shift);
164
4
21
            croak $p;
165        },
166
20
174
        spec => {
167            before => {
168                optional => 1,
169                callbacks => {
170                    'is now or a datetime' => \&_is_now_or_a_datetime,
171                },
172            },
173            after => {
174                optional => 1,
175                callbacks => {
176                    'is a datetime' => \&_is_a_datetime,
177                },
178            },
179            limit => {
180                optional => 1,
181                callbacks => {
182                    'is greater or equal to 2' => \&_is_greater_or_equal_to_2,
183                },
184            },
185        },
186    );
187
188
12
625
    my %P = @_; # params as hash
189
190
12
151
    if (exists $P{before} and exists $P{after}) {
191
2
12
        croak 'before and after mutually exclusive';
192    };
193
194
10
109
    if (!exists $P{before} and !exists $P{after}) {
195
2
12
        croak 'either before or after needed';
196    };
197
198
8
58
    my ($revision_counter, $revision_duplicates)
199        = $self->_xml_to_revisions(
200            $self->_post(
201                $self->_post_params(\%P)
202            )
203        );
204
205
8
197
    return ($revision_counter, $revision_duplicates);
206};
207
208sub _xml_to_revisions {
209
16
2106
    my $self = shift;
210
16
797
    my $xml = shift;
211
212
16
60
    my $revision_counter = 0;
213
16
44
    my $revision_duplicates = 0;
214
215
16
47
    eval {
216        XML::Twig->new(twig_handlers => {'revision' => sub {
217
198
9793
            my $twig = shift;
218
198
558
            my $elt = shift;
219
220
198
464
            $revision_counter++;
221
222
198
1062
            my $r_id = $elt->first_child_text('id');
223
224
198
1677
            if (exists $self->_revisions->{$r_id}) {
225
40
99
                $revision_duplicates++;
226            } else {
227
158
594
                my $contrib_node = $elt->first_child('contributor');
228
229
158
1023
                my $contributor;
230
158
632
                if ($contrib_node->first_child_text('username')) {
231
84
1096
                    $contributor
232                        = $contrib_node->first_child_text('username');
233                } else {
234
74
1426
                    $contributor
235                        = $contrib_node->first_child_text('ip');
236                };
237
238
158
1013
                $self->_revisions->{$elt->first_child_text('id')} = [
239                    $elt->first_child_text('timestamp'),
240                    $contributor,
241                    [
242                        split /(?<=\n)/, # at line breaks, but don't remove
243                        $elt->first_child_text('text')
244                    ],
245                ];
246            };
247
198
1518
            $twig->purge;
248
16
513
        }})->parse($xml)->purge
249    };
250
251
16
54
    if ($@) {
252# XML::Parser dies, not croaks with some especially dirty error message,
253# so I have to do a good scrubbing
254
2
121
        my $e = $@;
255
2
9
        $e = substr $e, 1; # remove leading "\n"
256
257
2
21
        croak 'XML parsing failed: '
258            . substr $e, 0, ( # clean up stacktrace
259            index $e, ' at ', 1+( # next ' at ' (discard at this position)
260                index $e, ' at ' # first ' at ' (keep it)
261            )
262        );
263    };
264
265
14
902
    return ($revision_counter, $revision_duplicates);
266};
267
268sub _post {
269
20
61
    my $self = shift;
270
20
60
    my $post_params = shift; # hashref
271
272
20
122
    $self->_lwp->timeout($self->ua_timeout);
273
274
20
1362
    my $response = $self->_lwp->post($self->export, $post_params);
275
20
2586
    if (not $response->is_success) {
276
4
124
        croak 'POST request to ' . $self->export . ' failed: '
277          . $response->status_line;
278    };
279
280
16
550
    return $response->decoded_content;
281};
282
283sub revisions {
284
10
1
66
    my $self = shift;
285
286
10
28
    my @r;
287
10
1501
10
26
4781
62
    foreach my $r_id (sort {$a <=> $b} keys %{ $self->_revisions }) {
288
304
851
        push @r, Mediawiki::Blame::Revision->_new(
289            $r_id,
290
304
687
            @{ $self->_revisions->{$r_id} } # 3 elements
291        );
292    };
293
294
10
494
    return @r;
295};
296
297sub blame {
298
10
1
258
    my $self = shift;
299
300    validate_with(
301        params => \@_,
302        on_fail => sub {
303
2
15
            chomp (my $p = shift);
304
2
13
            croak $p;
305        },
306        spec => {
307            revision => {
308                optional => 1,
309                callbacks => {
310                    'is a valid r_id' => sub {
311
8
63
                        return exists $self->_revisions->{shift()};
312                    },
313                },
314            },
315        },
316
10
55
    );
317
318
8
240
    my %P = @_; # params as hash
319
320
8
2979
8
22
11668
34
    my @r_ids = sort {$a <=> $b} keys %{ $self->_revisions };
321
8
341
    my $last_r_id;
322
8
52
    if ($P{revision}) {
323
6
25
        $last_r_id = $P{revision};
324    } else {
325
2
8
        $last_r_id = $r_ids[-1];
326    };
327
328
8
83
    my $ann = Algorithm::Annotate->new;
329
8
592
241
1274
    for my $r_id (grep {$_ <= $last_r_id} @r_ids) {
330
166
637138
        $ann->add(
331            $r_id,
332            $self->_revisions->{$r_id}[2] # text
333        );
334    };
335
336
8
8
3523
32
    my @last_revision_text = @{ $self->_revisions->{$last_r_id}[2] };
337
8
26
    my $first_revision = $r_ids[0];
338
339
27
3731
    return map {
340
8
66
        my $id = $ann->result->[$_];
341
27
513
        if ($id == $first_revision and $id != $self->_initial) {
342
2
9
            Mediawiki::Blame::Line->_new(
343                undef,
344                $self->_revisions->{$id}->[0],
345                undef,
346                $last_revision_text[$_],
347            );
348        } else {
349
25
3663
            Mediawiki::Blame::Line->_new(
350                $id,
351                $self->_revisions->{$id}->[0],
352                $self->_revisions->{$id}->[1],
353                $last_revision_text[$_],
354            );
355        };
356    } 0..$#last_revision_text;
357};
358
3591;