1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
use super::internal::*;
use super::*;
use std::str::Chars;

/// Test if a byte is the start of a UTF-8 character.
/// (extracted from `str::is_char_boundary`)
fn is_char_boundary(b: u8) -> bool {
    // This is bit magic equivalent to: b < 128 || b >= 192
    (b as i8) >= -0x40
}


impl<'a> ParallelString for &'a str {
    type Chars = ParChars<'a>;

    fn par_chars(self) -> Self::Chars {
        ParChars { chars: self }
    }
}


pub struct ParChars<'a> {
    chars: &'a str,
}

impl<'a> ParallelIterator for ParChars<'a> {
    type Item = char;

    fn drive_unindexed<C>(self, consumer: C) -> C::Result
        where C: UnindexedConsumer<Self::Item>
    {
        bridge_unindexed(self, consumer)
    }
}

impl<'a> UnindexedProducer for ParChars<'a> {
    fn can_split(&self) -> bool {
        // This is pessimistic, as we only *know* there are multiple characters
        // when it's longer than Unicode's maximum UTF-8 length of 4.  There
        // could be smaller characters, but it's ok not to split maximally.
        self.chars.len() > 4
    }

    fn split(self) -> (Self, Self) {
        let mid = self.chars.len() / 2;

        // We want to split near the midpoint, but we need to find an actual
        // character boundary.  So we look at the raw bytes, first scanning
        // forward from the midpoint for a boundary, then trying backward.
        let (left, right) = self.chars.as_bytes().split_at(mid);
        let index = right.iter()
            .cloned()
            .position(is_char_boundary)
            .map(|i| mid + i)
            .or_else(|| left.iter().cloned().rposition(is_char_boundary))
            .unwrap_or(0);

        let (left, right) = self.chars.split_at(index);
        (ParChars { chars: left }, ParChars { chars: right })
    }
}

impl<'a> IntoIterator for ParChars<'a> {
    type Item = char;
    type IntoIter = Chars<'a>;

    fn into_iter(self) -> Self::IntoIter {
        self.chars.chars()
    }
}