diff options
Diffstat (limited to 'plugins/emoji/update_emoji.pl')
-rw-r--r-- | plugins/emoji/update_emoji.pl | 113 |
1 files changed, 113 insertions, 0 deletions
diff --git a/plugins/emoji/update_emoji.pl b/plugins/emoji/update_emoji.pl new file mode 100644 index 000000000..d90c6e316 --- /dev/null +++ b/plugins/emoji/update_emoji.pl @@ -0,0 +1,113 @@ +#!/usr/bin/perl -w +# +# update_emoji.pl +# +# This script generates the emoji.plugin.zsh emoji definitions from the Unicode +# character data for the emoji characters. +# +# The data file can be found at http://unicode.org/Public/emoji/latest/emoji-data.txt +# as referenced in Unicode TR51 (http://www.unicode.org/reports/tr51/index.html). +# +# This is known to work with the data file from version 1.0. It may not work with later +# versions if the format changes. In particular, this reads line comments to get the +# emoji character name and unicode version. +# +# Country names have punctuation and other non-letter characters removed from their name, +# to avoid possible complications with having to escape the strings when using them as +# array subscripts. The definition file seems to use some combining characters like accents +# that get stripped during this process. + +use strict; +use warnings; +use 5.010; +use autodie; + +use Path::Class; +use File::Copy; + +# Parse definitions out of the data file and convert +sub process_emoji_data_file { + my ( $infile, $outfilename ) = @_; + my $file = file($infile); + my $outfile = file($outfilename); + my $outfilebase = $outfile->basename(); + my $tempfilename = "$outfilename.tmp"; + my $tempfile = file($tempfilename); + my $outfh = $tempfile->openw(); + $outfh->print(" +# $outfilebase - Emoji character definitions for oh-my-zsh emoji plugin +# +# This file is auto-generated by update_emoji.pl. Do not edit it manually. +# +# This contains the definition for: +# \$emoji - which maps character names to Unicode characters +# \$emoji_flags - maps country names to Unicode flag characters using region indicators + +# Main emoji +typeset -gAH emoji +# National flags +typeset -gAH emoji_flags +# Combining modifiers +typeset -gAH emoji_mod + +"); + + my $fh = $file->openr(); + my $line_num = 0; + while ( my $line = $fh->getline() ) { + $line_num++; + $_ = $line; + # Skip all-comment lines (from the header) and blank lines + # (But don't strip comments on normal lines; we need to parse those for + # the emoji names.) + next if /^\s*#/ or /^\s*$/; + + if (/^(\S.*?\S)\s*;\s*(\w+)\s*;\s*(\w+)\s*;\s*(\w+)\s*;\s*(\w.*?)\s*#\s*V(\S+)\s\(.*?\)\s*(\w.*\S)\s*$/) { + my ($code, $style, $level, $modifier_status, $sources, $version, $keycap_name) + = ($1, $2, $3, $4, $5, $6, $7); + #print "code=$code style=$style level=$level modifier_status=$modifier_status sources=$sources version=$version name=$keycap_name\n"; + my @code_points = split /\s+/, $code; + my @sources = split /\s+/, $sources; + + my $flag_country = ""; + if ( $keycap_name =~ /^flag for (\S.*?)\s*$/) { + $flag_country = $1; + } + + my $zsh_code = join '', map { "\\U$_" } @code_points; + # Convert keycap names to valid associative array names that do not require any + # quoting. Works fine for most stuff, but is clumsy for flags. + my $omz_name = lc($keycap_name); + $omz_name =~ s/[^A-Za-z0-9]/_/g; + my $zsh_flag_country = $flag_country; + $zsh_flag_country =~ s/[^\p{Letter}]/_/g; + if ($flag_country) { + $outfh->print("emoji_flags[$zsh_flag_country]=\$'$zsh_code'\n"); + } else { + $outfh->print("emoji[$omz_name]=\$'$zsh_code'\n"); + } + # Modifiers are included in both the main set and their separate map, + # because they have a standalone representation as a color swatch. + if ( $modifier_status == "modifier" ) { + $outfh->print("emoji_mod[$omz_name]=\$'$zsh_code'\n"); + } + } else { + die "Failed parsing line $line_num: '$_'"; + } + } + $fh->close(); + $outfh->print("\n"); + $outfh->close(); + + move($tempfilename, $outfilename) + or die "Failed moving temp file to $outfilename: $!"; +} + +my $datafile = "emoji-data.txt"; +my $zsh_def_file = "emoji-char-definitions.zsh"; +process_emoji_data_file($datafile, $zsh_def_file); + +print "Updated definition file $zsh_def_file\n"; + + + |