From 6d618ef70e5abfc5e18656ba8064ead0c8974386 Mon Sep 17 00:00:00 2001 From: Andrew Janke Date: Wed, 15 Jul 2015 02:56:48 -0400 Subject: emoji plugin: generate char map from Unicode UTR51 data files Refactor the functions and group listings to have fewer functions, but have them take group names as parameters. Pull group definitions in to a single associative array. --- plugins/emoji/update_emoji.pl | 113 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 plugins/emoji/update_emoji.pl (limited to 'plugins/emoji/update_emoji.pl') diff --git a/plugins/emoji/update_emoji.pl b/plugins/emoji/update_emoji.pl new file mode 100644 index 000000000..d90c6e316 --- /dev/null +++ b/plugins/emoji/update_emoji.pl @@ -0,0 +1,113 @@ +#!/usr/bin/perl -w +# +# update_emoji.pl +# +# This script generates the emoji.plugin.zsh emoji definitions from the Unicode +# character data for the emoji characters. +# +# The data file can be found at http://unicode.org/Public/emoji/latest/emoji-data.txt +# as referenced in Unicode TR51 (http://www.unicode.org/reports/tr51/index.html). +# +# This is known to work with the data file from version 1.0. It may not work with later +# versions if the format changes. In particular, this reads line comments to get the +# emoji character name and unicode version. +# +# Country names have punctuation and other non-letter characters removed from their name, +# to avoid possible complications with having to escape the strings when using them as +# array subscripts. The definition file seems to use some combining characters like accents +# that get stripped during this process. + +use strict; +use warnings; +use 5.010; +use autodie; + +use Path::Class; +use File::Copy; + +# Parse definitions out of the data file and convert +sub process_emoji_data_file { + my ( $infile, $outfilename ) = @_; + my $file = file($infile); + my $outfile = file($outfilename); + my $outfilebase = $outfile->basename(); + my $tempfilename = "$outfilename.tmp"; + my $tempfile = file($tempfilename); + my $outfh = $tempfile->openw(); + $outfh->print(" +# $outfilebase - Emoji character definitions for oh-my-zsh emoji plugin +# +# This file is auto-generated by update_emoji.pl. Do not edit it manually. +# +# This contains the definition for: +# \$emoji - which maps character names to Unicode characters +# \$emoji_flags - maps country names to Unicode flag characters using region indicators + +# Main emoji +typeset -gAH emoji +# National flags +typeset -gAH emoji_flags +# Combining modifiers +typeset -gAH emoji_mod + +"); + + my $fh = $file->openr(); + my $line_num = 0; + while ( my $line = $fh->getline() ) { + $line_num++; + $_ = $line; + # Skip all-comment lines (from the header) and blank lines + # (But don't strip comments on normal lines; we need to parse those for + # the emoji names.) + next if /^\s*#/ or /^\s*$/; + + if (/^(\S.*?\S)\s*;\s*(\w+)\s*;\s*(\w+)\s*;\s*(\w+)\s*;\s*(\w.*?)\s*#\s*V(\S+)\s\(.*?\)\s*(\w.*\S)\s*$/) { + my ($code, $style, $level, $modifier_status, $sources, $version, $keycap_name) + = ($1, $2, $3, $4, $5, $6, $7); + #print "code=$code style=$style level=$level modifier_status=$modifier_status sources=$sources version=$version name=$keycap_name\n"; + my @code_points = split /\s+/, $code; + my @sources = split /\s+/, $sources; + + my $flag_country = ""; + if ( $keycap_name =~ /^flag for (\S.*?)\s*$/) { + $flag_country = $1; + } + + my $zsh_code = join '', map { "\\U$_" } @code_points; + # Convert keycap names to valid associative array names that do not require any + # quoting. Works fine for most stuff, but is clumsy for flags. + my $omz_name = lc($keycap_name); + $omz_name =~ s/[^A-Za-z0-9]/_/g; + my $zsh_flag_country = $flag_country; + $zsh_flag_country =~ s/[^\p{Letter}]/_/g; + if ($flag_country) { + $outfh->print("emoji_flags[$zsh_flag_country]=\$'$zsh_code'\n"); + } else { + $outfh->print("emoji[$omz_name]=\$'$zsh_code'\n"); + } + # Modifiers are included in both the main set and their separate map, + # because they have a standalone representation as a color swatch. + if ( $modifier_status == "modifier" ) { + $outfh->print("emoji_mod[$omz_name]=\$'$zsh_code'\n"); + } + } else { + die "Failed parsing line $line_num: '$_'"; + } + } + $fh->close(); + $outfh->print("\n"); + $outfh->close(); + + move($tempfilename, $outfilename) + or die "Failed moving temp file to $outfilename: $!"; +} + +my $datafile = "emoji-data.txt"; +my $zsh_def_file = "emoji-char-definitions.zsh"; +process_emoji_data_file($datafile, $zsh_def_file); + +print "Updated definition file $zsh_def_file\n"; + + + -- cgit v1.2.3-70-g09d2