summaryrefslogtreecommitdiff
path: root/plugins/emoji/update_emoji.pl
diff options
context:
space:
mode:
Diffstat (limited to 'plugins/emoji/update_emoji.pl')
-rw-r--r--plugins/emoji/update_emoji.pl113
1 files changed, 113 insertions, 0 deletions
diff --git a/plugins/emoji/update_emoji.pl b/plugins/emoji/update_emoji.pl
new file mode 100644
index 000000000..d90c6e316
--- /dev/null
+++ b/plugins/emoji/update_emoji.pl
@@ -0,0 +1,113 @@
+#!/usr/bin/perl -w
+#
+# update_emoji.pl
+#
+# This script generates the emoji.plugin.zsh emoji definitions from the Unicode
+# character data for the emoji characters.
+#
+# The data file can be found at http://unicode.org/Public/emoji/latest/emoji-data.txt
+# as referenced in Unicode TR51 (http://www.unicode.org/reports/tr51/index.html).
+#
+# This is known to work with the data file from version 1.0. It may not work with later
+# versions if the format changes. In particular, this reads line comments to get the
+# emoji character name and unicode version.
+#
+# Country names have punctuation and other non-letter characters removed from their name,
+# to avoid possible complications with having to escape the strings when using them as
+# array subscripts. The definition file seems to use some combining characters like accents
+# that get stripped during this process.
+
+use strict;
+use warnings;
+use 5.010;
+use autodie;
+
+use Path::Class;
+use File::Copy;
+
+# Parse definitions out of the data file and convert
+sub process_emoji_data_file {
+ my ( $infile, $outfilename ) = @_;
+ my $file = file($infile);
+ my $outfile = file($outfilename);
+ my $outfilebase = $outfile->basename();
+ my $tempfilename = "$outfilename.tmp";
+ my $tempfile = file($tempfilename);
+ my $outfh = $tempfile->openw();
+ $outfh->print("
+# $outfilebase - Emoji character definitions for oh-my-zsh emoji plugin
+#
+# This file is auto-generated by update_emoji.pl. Do not edit it manually.
+#
+# This contains the definition for:
+# \$emoji - which maps character names to Unicode characters
+# \$emoji_flags - maps country names to Unicode flag characters using region indicators
+
+# Main emoji
+typeset -gAH emoji
+# National flags
+typeset -gAH emoji_flags
+# Combining modifiers
+typeset -gAH emoji_mod
+
+");
+
+ my $fh = $file->openr();
+ my $line_num = 0;
+ while ( my $line = $fh->getline() ) {
+ $line_num++;
+ $_ = $line;
+ # Skip all-comment lines (from the header) and blank lines
+ # (But don't strip comments on normal lines; we need to parse those for
+ # the emoji names.)
+ next if /^\s*#/ or /^\s*$/;
+
+ if (/^(\S.*?\S)\s*;\s*(\w+)\s*;\s*(\w+)\s*;\s*(\w+)\s*;\s*(\w.*?)\s*#\s*V(\S+)\s\(.*?\)\s*(\w.*\S)\s*$/) {
+ my ($code, $style, $level, $modifier_status, $sources, $version, $keycap_name)
+ = ($1, $2, $3, $4, $5, $6, $7);
+ #print "code=$code style=$style level=$level modifier_status=$modifier_status sources=$sources version=$version name=$keycap_name\n";
+ my @code_points = split /\s+/, $code;
+ my @sources = split /\s+/, $sources;
+
+ my $flag_country = "";
+ if ( $keycap_name =~ /^flag for (\S.*?)\s*$/) {
+ $flag_country = $1;
+ }
+
+ my $zsh_code = join '', map { "\\U$_" } @code_points;
+ # Convert keycap names to valid associative array names that do not require any
+ # quoting. Works fine for most stuff, but is clumsy for flags.
+ my $omz_name = lc($keycap_name);
+ $omz_name =~ s/[^A-Za-z0-9]/_/g;
+ my $zsh_flag_country = $flag_country;
+ $zsh_flag_country =~ s/[^\p{Letter}]/_/g;
+ if ($flag_country) {
+ $outfh->print("emoji_flags[$zsh_flag_country]=\$'$zsh_code'\n");
+ } else {
+ $outfh->print("emoji[$omz_name]=\$'$zsh_code'\n");
+ }
+ # Modifiers are included in both the main set and their separate map,
+ # because they have a standalone representation as a color swatch.
+ if ( $modifier_status == "modifier" ) {
+ $outfh->print("emoji_mod[$omz_name]=\$'$zsh_code'\n");
+ }
+ } else {
+ die "Failed parsing line $line_num: '$_'";
+ }
+ }
+ $fh->close();
+ $outfh->print("\n");
+ $outfh->close();
+
+ move($tempfilename, $outfilename)
+ or die "Failed moving temp file to $outfilename: $!";
+}
+
+my $datafile = "emoji-data.txt";
+my $zsh_def_file = "emoji-char-definitions.zsh";
+process_emoji_data_file($datafile, $zsh_def_file);
+
+print "Updated definition file $zsh_def_file\n";
+
+
+