summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/functions.zsh134
-rw-r--r--lib/termsupport.zsh33
2 files changed, 135 insertions, 32 deletions
diff --git a/lib/functions.zsh b/lib/functions.zsh
index 17f5f9cbf..5c1a5a283 100644
--- a/lib/functions.zsh
+++ b/lib/functions.zsh
@@ -73,3 +73,137 @@ function env_default() {
env | grep -q "^$1=" && return 0
export "$1=$2" && return 3
}
+
+
+# Required for $langinfo
+zmodload zsh/langinfo
+
+# URL-encode a string
+#
+# Encodes a string using RFC 2396 URL-encoding (%-escaped).
+# See: https://www.ietf.org/rfc/rfc2396.txt
+#
+# By default, reserved characters and unreserved "mark" characters are
+# not escaped by this function. This allows the common usage of passing
+# an entire URL in, and encoding just special characters in it, with
+# the expectation that reserved and mark characters are used appropriately.
+# The -r and -m options turn on escaping of the reserved and mark characters,
+# respectively, which allows arbitrary strings to be fully escaped for
+# embedding inside URLs, where reserved characters might be misinterpreted.
+#
+# Prints the encoded string on stdout.
+# Returns nonzero if encoding failed.
+#
+# Usage:
+# omz_urlencode [-r] [-m] <string>
+#
+# -r causes reserved characters (;/?:@&=+$,) to be escaped
+#
+# -m causes "mark" characters (_.!~*''()-) to be escaped
+#
+# -P causes spaces to be encoded as '%20' instead of '+'
+function omz_urlencode() {
+ emulate -L zsh
+ zparseopts -D -E -a opts r m P
+
+ local in_str=$1
+ local url_str=""
+ local spaces_as_plus
+ if [[ -z $opts[(r)-P] ]]; then spaces_as_plus=1; fi
+ local str="$in_str"
+
+ # URLs must use UTF-8 encoding; convert str to UTF-8 if required
+ local encoding=$langinfo[CODESET]
+ local safe_encodings
+ safe_encodings=(UTF-8 utf8 US-ASCII)
+ if [[ -z ${safe_encodings[(r)$encoding]} ]]; then
+ str=$(echo -E "$str" | iconv -f $encoding -t UTF-8)
+ if [[ $? != 0 ]]; then
+ echo "Error converting string from $encoding to UTF-8" >&2
+ return 1
+ fi
+ fi
+
+ # Use LC_CTYPE=C to process text byte-by-byte
+ local i byte ord LC_ALL=C
+ export LC_ALL
+ local reserved=';/?:@&=+$,'
+ local mark='_.!~*''()-'
+ local dont_escape="[A-Za-z0-9"
+ if [[ -z $opts[(r)-r] ]]; then
+ dont_escape+=$reserved
+ fi
+ # $mark must be last because of the "-"
+ if [[ -z $opts[(r)-m] ]]; then
+ dont_escape+=$mark
+ fi
+ dont_escape+="]"
+
+ # Implemented to use a single printf call and avoid subshells in the loop,
+ # for performance (primarily on Windows).
+ local url_str=""
+ for (( i = 1; i <= ${#str}; ++i )); do
+ byte="$str[i]"
+ if [[ "$byte" =~ "$dont_escape" ]]; then
+ url_str+="$byte"
+ else
+ if [[ "$byte" == " " && -n $spaces_as_plus ]]; then
+ url_str+="+"
+ else
+ ord=$(( [##16] #byte ))
+ url_str+="%$ord"
+ fi
+ fi
+ done
+ echo -E "$url_str"
+}
+
+# URL-decode a string
+#
+# Decodes a RFC 2396 URL-encoded (%-escaped) string.
+# This decodes the '+' and '%' escapes in the input string, and leaves
+# other characters unchanged. Does not enforce that the input is a
+# valid URL-encoded string. This is a convenience to allow callers to
+# pass in a full URL or similar strings and decode them for human
+# presentation.
+#
+# Outputs the encoded string on stdout.
+# Returns nonzero if encoding failed.
+#
+# Usage:
+# omz_urldecode <urlstring> - prints decoded string followed by a newline
+function omz_urldecode {
+ emulate -L zsh
+ local encoded_url=$1
+
+ echo -e input $1
+ # Work bytewise, since URLs escape UTF-8 octets
+ local caller_encoding=$langinfo[CODESET]
+ local LC_ALL=C
+ export LC_ALL
+
+ # Change + back to ' '
+ local tmp=${encoded_url:gs/+/ /}
+ # Protect other escapes to pass through the printf unchanged
+ tmp=${tmp:gs/\\/\\\\/}
+ # Handle %-escapes by turning them into `\xXX` printf escapes
+ tmp=${tmp:gs/%/\\x/}
+ echo -E "before decode $tmp"
+ local decoded
+ eval "decoded=\$'$tmp'"
+
+ # Now we have a UTF-8 encoded string in the variable. We need to re-encode
+ # it if caller is in a non-UTF-8 locale.
+ local safe_encodings
+ safe_encodings=(UTF-8 utf8 US-ASCII)
+ if [[ -z ${safe_encodings[(r)$caller_encoding]} ]]; then
+ decoded=$(echo -E "$decoded" | iconv -f UTF-8 -t $caller_encoding)
+ if [[ $? != 0 ]]; then
+ echo "Error converting string from UTF-8 to $caller_encoding" >&2
+ return 1
+ fi
+ fi
+
+ echo -E "$decoded"
+}
+
diff --git a/lib/termsupport.zsh b/lib/termsupport.zsh
index 52622f5ab..726cdce41 100644
--- a/lib/termsupport.zsh
+++ b/lib/termsupport.zsh
@@ -59,44 +59,13 @@ preexec_functions+=(omz_termsupport_preexec)
if [[ "$TERM_PROGRAM" == "Apple_Terminal" ]] && [[ -z "$INSIDE_EMACS" ]]; then
- # URL-encodes a string
- # Outputs the encoded string on stdout
- # Returns nonzero if encoding failed
- function _omz_urlencode() {
- local str=$1
- local url_str=""
-
- # URLs must use UTF-8 encoding; convert if required
- local encoding=${LC_CTYPE/*./}
- if [[ -n $encoding && $encoding != UTF-8 && $encoding != utf8 ]]; then
- str=$(echo $str | iconv -f $encoding -t UTF-8)
- if [[ $? != 0 ]]; then
- echo "Error converting string from $encoding to UTF-8" >&2
- return 1
- fi
- fi
-
- # Use LC_CTYPE=C to process text byte-by-byte
- local i ch hexch LC_CTYPE=C
- for ((i = 1; i <= ${#str}; ++i)); do
- ch="$str[i]"
- if [[ "$ch" =~ [/._~A-Za-z0-9-] ]]; then
- url_str+="$ch"
- else
- hexch=$(printf "%02X" "'$ch")
- url_str+="%$hexch"
- fi
- done
- echo $url_str
- }
-
# Emits the control sequence to notify Terminal.app of the cwd
function update_terminalapp_cwd() {
# Identify the directory using a "file:" scheme URL, including
# the host name to disambiguate local vs. remote paths.
# Percent-encode the pathname.
- local URL_PATH=$(_omz_urlencode $PWD)
+ local URL_PATH=$(omz_urlencode -P $PWD)
[[ $? != 0 ]] && return 1
local PWD_URL="file://$HOST$URL_PATH"
# Undocumented Terminal.app-specific control sequence