stevee

My wayland statusbar
git clone git://gtms.dev/stevee
Log | Files | Refs | README | LICENSE

xml.zig (10654B)


      1 const std = @import("std");
      2 const mem = std.mem;
      3 
      4 pub const Parser = struct {
      5     document: []const u8,
      6     current_tag: []const u8 = undefined,
      7     char_buffer: [4]u8 = undefined,
      8     mode: enum { normal, attrs, chars, entity } = .normal,
      9 
     10     pub fn init(document: []const u8) Parser {
     11         return Parser{
     12             .document = document,
     13         };
     14     }
     15 
     16     pub fn next(p: *Parser) ?Event {
     17         return switch (p.mode) {
     18             .normal => p.nextNormal(),
     19             .attrs => p.nextAttrs(),
     20             .chars => p.nextChars(),
     21             .entity => p.nextEntity(),
     22         };
     23     }
     24 
     25     fn nextNormal(p: *Parser) ?Event {
     26         p.skipWhitespace();
     27         switch (p.peek(0) orelse return null) {
     28             '<' => switch (p.peek(1) orelse return null) {
     29                 '?' => {
     30                     if (mem.indexOf(u8, p.document[2..], "?>")) |end| {
     31                         const ev = Event{ .processing_instruction = p.document[2 .. end + 2] };
     32                         p.document = p.document[end + 4 ..];
     33                         return ev;
     34                     }
     35                 },
     36                 '/' => switch (p.peek(2) orelse return null) {
     37                     ':', 'A'...'Z', '_', 'a'...'z' => {
     38                         if (mem.indexOfScalar(u8, p.document[3..], '>')) |end| {
     39                             const ev = Event{ .close_tag = p.document[2 .. end + 3] };
     40                             p.document = p.document[end + 4 ..];
     41                             return ev;
     42                         }
     43                     },
     44                     else => {},
     45                 },
     46                 '!' => switch (p.peek(2) orelse return null) {
     47                     '-' => if ((p.peek(3) orelse return null) == '-') {
     48                         if (mem.indexOf(u8, p.document[3..], "-->")) |end| {
     49                             const ev = Event{ .comment = p.document[4 .. end + 3] };
     50                             p.document = p.document[end + 6 ..];
     51                             return ev;
     52                         }
     53                     },
     54                     '[' => if (mem.startsWith(u8, p.document[3..], "CDATA[")) {
     55                         if (mem.indexOf(u8, p.document, "]]>")) |end| {
     56                             const ev = Event{ .character_data = p.document[9..end] };
     57                             p.document = p.document[end + 3 ..];
     58                             return ev;
     59                         }
     60                     },
     61                     else => {},
     62                 },
     63                 ':', 'A'...'Z', '_', 'a'...'z' => {
     64                     const angle = mem.indexOfScalar(u8, p.document, '>') orelse return null;
     65                     if (mem.indexOfScalar(u8, p.document[0..angle], ' ')) |space| {
     66                         const ev = Event{ .open_tag = p.document[1..space] };
     67                         p.current_tag = ev.open_tag;
     68                         p.document = p.document[space..];
     69                         p.mode = .attrs;
     70                         return ev;
     71                     }
     72                     if (mem.indexOfScalar(u8, p.document[0..angle], '/')) |slash| {
     73                         const ev = Event{ .open_tag = p.document[1..slash] };
     74                         p.current_tag = ev.open_tag;
     75                         p.document = p.document[slash..];
     76                         p.mode = .attrs;
     77                         return ev;
     78                     }
     79                     const ev = Event{ .open_tag = p.document[1..angle] };
     80                     p.current_tag = ev.open_tag;
     81                     p.document = p.document[angle..];
     82                     p.mode = .attrs;
     83                     return ev;
     84                 },
     85                 else => {},
     86             },
     87             else => {
     88                 p.mode = .chars;
     89                 return p.nextChars();
     90             },
     91         }
     92         return null;
     93     }
     94 
     95     fn nextAttrs(p: *Parser) ?Event {
     96         p.skipWhitespace();
     97         switch (p.peek(0) orelse return null) {
     98             '>' => {
     99                 p.document = p.document[1..];
    100                 p.mode = .normal;
    101                 return p.nextNormal();
    102             },
    103             '/' => {
    104                 const ev = Event{ .close_tag = p.current_tag };
    105                 if ((p.peek(1) orelse return null) != '>')
    106                     return null;
    107                 p.document = p.document[2..];
    108                 p.mode = .normal;
    109                 return ev;
    110             },
    111             else => {},
    112         }
    113 
    114         var i: usize = 0;
    115         while (isNameChar(p.peek(i) orelse return null)) : (i += 1) {}
    116         const name = p.document[0..i];
    117 
    118         p.document = p.document[i..];
    119         p.skipWhitespace();
    120 
    121         if ((p.peek(0) orelse return null) != '=')
    122             return null;
    123 
    124         p.document = p.document[1..];
    125         p.skipWhitespace();
    126 
    127         const c = p.peek(0) orelse return null;
    128         switch (c) {
    129             '\'', '"' => {
    130                 if (mem.indexOfScalar(u8, p.document[1..], c)) |end| {
    131                     const ev = Event{
    132                         .attribute = .{
    133                             .name = name,
    134                             .raw_value = p.document[1 .. end + 1],
    135                         },
    136                     };
    137                     p.document = p.document[end + 2 ..];
    138                     return ev;
    139                 }
    140             },
    141             else => {},
    142         }
    143 
    144         return null;
    145     }
    146 
    147     fn nextChars(p: *Parser) ?Event {
    148         var i: usize = 0;
    149         while (true) : (i += 1) {
    150             const c = p.peek(i) orelse return null;
    151             if (c == '<' or c == '&') {
    152                 const ev = Event{ .character_data = p.document[0..i] };
    153                 p.document = p.document[i..];
    154                 p.mode = switch (c) {
    155                     '<' => .normal,
    156                     '&' => .entity,
    157                     else => unreachable,
    158                 };
    159                 return ev;
    160             }
    161             switch (c) {
    162                 '<', '&' => {},
    163                 else => {},
    164             }
    165         }
    166         return null;
    167     }
    168 
    169     fn parseEntity(s: []const u8, buf: *[4]u8) ?usize {
    170         const semi = mem.indexOfScalar(u8, s, ';') orelse return null;
    171         const entity = s[0..semi];
    172         if (mem.eql(u8, entity, "lt")) {
    173             buf.* = mem.toBytes(@as(u32, '<'));
    174         } else if (mem.eql(u8, entity, "gt")) {
    175             buf.* = mem.toBytes(@as(u32, '>'));
    176         } else if (mem.eql(u8, entity, "amp")) {
    177             buf.* = mem.toBytes(@as(u32, '&'));
    178         } else if (mem.eql(u8, entity, "apos")) {
    179             buf.* = mem.toBytes(@as(u32, '\''));
    180         } else if (mem.eql(u8, entity, "quot")) {
    181             buf.* = mem.toBytes(@as(u32, '"'));
    182         } else if (mem.startsWith(u8, entity, "#x")) {
    183             const codepoint = std.fmt.parseInt(u21, entity[2..semi], 16) catch return null;
    184             buf.* = mem.toBytes(@as(u32, codepoint));
    185         } else if (mem.startsWith(u8, entity, "#")) {
    186             const codepoint = std.fmt.parseInt(u21, entity[1..semi], 10) catch return null;
    187             buf.* = mem.toBytes(@as(u32, codepoint));
    188         } else {
    189             return null;
    190         }
    191         return semi;
    192     }
    193 
    194     fn nextEntity(p: *Parser) ?Event {
    195         if ((p.peek(0) orelse return null) != '&')
    196             return null;
    197 
    198         if (parseEntity(p.document[1..], &p.char_buffer)) |semi| {
    199             const codepoint = mem.bytesToValue(u32, &p.char_buffer);
    200             const n = std.unicode.utf8Encode(@intCast(codepoint), &p.char_buffer) catch return null;
    201             p.document = p.document[semi + 2 ..];
    202             p.mode = .chars;
    203             return Event{ .character_data = p.char_buffer[0..n] };
    204         }
    205 
    206         return null;
    207     }
    208 
    209     fn isNameChar(c: u8) bool {
    210         return switch (c) {
    211             ':', 'A'...'Z', '_', 'a'...'z', '-', '.', '0'...'9' => true,
    212             else => false,
    213         };
    214     }
    215 
    216     fn skipWhitespace(p: *Parser) void {
    217         while (true) {
    218             switch (p.peek(0) orelse return) {
    219                 ' ', '\t', '\n', '\r' => {
    220                     p.document = p.document[1..];
    221                 },
    222                 else => {
    223                     return;
    224                 },
    225             }
    226         }
    227     }
    228 
    229     fn peek(p: *Parser, n: usize) ?u8 {
    230         if (p.document.len <= n)
    231             return null;
    232 
    233         return p.document[n];
    234     }
    235 };
    236 
    237 pub const Event = union(enum) {
    238     open_tag: []const u8,
    239     close_tag: []const u8,
    240     attribute: Attribute,
    241     comment: []const u8,
    242     processing_instruction: []const u8,
    243     character_data: []const u8,
    244 };
    245 
    246 pub const Attribute = struct {
    247     name: []const u8,
    248     raw_value: []const u8,
    249     char_buffer: [4]u8 = undefined,
    250 
    251     pub fn dupeValue(attr: Attribute, allocator: mem.Allocator) error{OutOfMemory}![]u8 {
    252         var list = std.ArrayList(u8).init(allocator);
    253         errdefer list.deinit();
    254         var attr_copy = attr;
    255         while (attr_copy.next()) |fragment|
    256             try list.appendSlice(fragment);
    257         return list.toOwnedSlice();
    258     }
    259 
    260     pub fn valueStartsWith(attr: Attribute, prefix: []const u8) bool {
    261         var attr_copy = attr;
    262         var i: usize = 0;
    263 
    264         while (attr_copy.next()) |fragment| {
    265             if (mem.startsWith(u8, fragment, prefix[i..])) {
    266                 i += fragment.len;
    267             } else {
    268                 return false;
    269             }
    270         }
    271 
    272         return i > prefix.len;
    273     }
    274 
    275     pub fn valueEql(attr: Attribute, value: []const u8) bool {
    276         var attr_copy = attr;
    277         var i: usize = 0;
    278 
    279         while (attr_copy.next()) |fragment| {
    280             if (mem.startsWith(u8, value[i..], fragment)) {
    281                 i += fragment.len;
    282             } else {
    283                 return false;
    284             }
    285         }
    286 
    287         return i == value.len;
    288     }
    289 
    290     pub fn next(attr: *Attribute) ?[]const u8 {
    291         if (attr.raw_value.len == 0)
    292             return null;
    293 
    294         if (attr.raw_value[0] == '&') {
    295             if (Parser.parseEntity(attr.raw_value[1..], &attr.char_buffer)) |semi| {
    296                 const codepoint = mem.bytesToValue(u32, &attr.char_buffer);
    297                 const n = std.unicode.utf8Encode(@intCast(codepoint), &attr.char_buffer) catch return null;
    298                 attr.raw_value = attr.raw_value[semi + 2 ..];
    299                 return attr.char_buffer[0..n];
    300             } else {
    301                 return null;
    302             }
    303         }
    304 
    305         var i: usize = 0;
    306         while (true) : (i += 1) {
    307             if (attr.raw_value.len == i or attr.raw_value[i] == '&') {
    308                 const ret = attr.raw_value[0..i];
    309                 attr.raw_value = attr.raw_value[i..];
    310                 return ret;
    311             }
    312         }
    313     }
    314 };