Differences between revisions 19 and 20
Revision 19 as of 2009-10-20 04:16:18
Size: 1901
Editor: yitz
Comment: Attempting to fix missing title. Removed blank line
Revision 20 as of 2012-01-06 23:52:27
Size: 6245
Editor: dethrophes
Comment: I've run nto alot of problems with these kinds of function so I have created more robust version.
Deletions are marked like this. Additions are marked like this.
Line 14: Line 14:
   # POSIX
   # chr() - converts decimal value to its ASCII character representation
   # ord() - converts ASCII character to its decimal value
 ###############################################################
 #
 # Note about Ext Ascii and UTF-8 encoding
 #
 # for values 0x00 - 0x7f identical
 # for values 0x80 - 0x00 conflict between UTF-8 & ExtAscii
 # for values 0x100 -0x7FFFFFFF Only UTF-8 UTF-32
 #
 # value EAscii UTF-8 UTF-16 UTF-32
 # 0x20 "\x20" "\x20" \u0020 \U00000020
 # 0x20 "\x7f" "\x7f" \u007f \U0000007f
 # 0x80 "\x80" "\xc2\x80" \u0080 \U00000080
 # 0xff "\xff" "\xc3\xbf" \u00ff \U000000ff
 # 0x100 N/A "\xc4\x80" \u0100 \U00000100
 # 0x1000 N/A "\xc8\x80" \u1000 \U00001000
 # 0xffff N/A "\xef\xbf\xbf" \uffff \U0000ffff
 # 0x10000 N/A "\xf0\x90\x80\x80" N/A \U00010000
 # 0xfffff N/A "\xf3\xbf\xbf\xbf" N/A \U000fffff
 # 0x10000000 N/A "\xfc\x90\x80\x80\x80\x80" N/A \U10000000
 # 0x7fffffff N/A "\xfd\xbf\xbf\xbf\xbf\xbf" N/A \U7fffffff
 # 0x80000000 N/A N/A N/A N/A
 # 0xffffffff N/A N/A N/A N/A
Line 18: Line 37:
   chr() {
     printf \\$(printf '%03o' $1)
   }
  
   #Another version doing the octal conversion with arithmetic
   # faster as it avoids a subshell
  chr () {
    printf \\$(($1/64*100+$1%64/8*10+$1%8))
   }
 ###########################################################################
 ## ord family
 ###########################################################################
 # ord <Return Variable Name> <Char to convert> [Optional Format String]
 # ord.hex <Return Variable Name> <Char to convert>
 # ord.oct <Return Variable Name> <Char to convert>
 # ord.utf8 <Return Variable Name> <Char to convert> [Optional Format String]
 # ord.eascii <Return Variable Name> <Char to convert> [Optional Format String]
 # ord.echo <Char to convert> [Optional Format String]
 # ord.hex.echo <Char to convert>
 # ord.oct.echo <Char to convert>
 # ord.utf8.echo <Char to convert> [Optional Format String]
 # ord.eascii.echo <Char to convert> [Optional Format String]
 #
 # Description:
 # converts character using native encoding to its decimal value and stores
 # it in the Variable specified
 #
 # ord
 # ord.hex output in hex
 # ord.hex output in octal
 # ord.utf8 forces UTF8 decoding
 # ord.eascii forces eascii decoding
 # ord.echo prints to stdout
 function ord {
  printf -v "${1?Missing Dest Variable}" "${3:-%d}" "'${2?Missing Char}"
 }
 function ord.oct {
  ord "${@:1:2}" "%c"
 }
 function ord.hex {
  ord "${@:1:2}" "%x"
 }
 function ord.utf8 {
  LC_CTYPE=en_US.UTF8 ord "${@}"
 }
 function ord.eascii {
  LC_CTYPE=C ord "${@}"
 }
 function ord.echo {
  printf "${2:-%d}" "'${1?Missing Char}"
 }
 function ord.oct.echo {
  ord.echo "${@:1:1}" "%o"
 }
 function ord.hex.echo {
  ord.echo "${@:1:1}" "%x"
 }
 function ord.utf8.echo {
  LC_CTYPE=en_US.UTF8 ord.echo "${@}"
 }
 function ord.eascii.echo {
  LC_CTYPE=C ord.echo "${@}"
 }
Line 28: Line 92:
   ord() {
     printf '%d' "'$1"
   }

   # hex() - converts ASCII character to a hexadecimal value
   # unhex() - converts a hexadecimal value to an ASCII character

   hex() {
      printf '%x' "'$1"
   }

   unhex() {
      printf \\x"$1"
   }

   # examples:

   chr $(ord A) # -> A
   ord $(chr 65) # -> 65
   ###########################################################################
   ## chr family
   ###########################################################################
 # chr.utf8 <Return Variale Name> <Integer to convert>
 # chr.eascii <Return Variale Name> <Integer to convert>
 # chr <Return Variale Name> <Integer to convert>
 # chr.oct <Return Variale Name> <Octal number to convert>
 # chr.hex <Return Variale Name> <Hex number to convert>
 # chr.utf8.echo <Integer to convert>
 # chr.eascii.echo <Integer to convert>
 # chr.echo <Integer to convert>
 # chr.oct.echo <Octal number to convert>
 # chr.hex.echo <Hex number to convert>
 #
 # Description:
 # converts decimal value to character representation an stores
 # it in the Variable specified
 #
 # chr Tries to guess output format
 # chr.utf8 forces UTF8 encoding
 # chr.eascii forces eascii encoding
 # chr.echo prints to stdout
 #
 function chr.utf8 {
  local val
  printf -v ${val} '\\U%08x' "${2?Missing Value}"
  printf -v ${1?Missing Dest Variable} ${val}
 }
 function chr.eascii {
  local val
  [ ${1} -lt 0x100 ] || return 1
  printf -v ${val} '\\x%02x' "${2?Missing Value}"
  printf -v ${1?Missing Dest Variable} ${val}
 }
 function chr {
  if [ ${1} -lt 0x80 ]; then
   chr.eascii "${@}"
  elif [ ${1} -lt 0x100 ]; then
   if [ "${LC_CTYPE:-${LC_ALL:-}}" = "C" ]; then
    chr.eascii "${@}"
   else
    chr.utf8 "${@}"
   fi
  else
   chr.utf8 "${@}"
  fi
 }
 function chr.oct {
      chr "${1}" "0${2}"
 }
 function chr.hex {
      chr "${1}" "0x${2}"
 }
 function chr.utf8.echo {
  local val
  printf -v ${val} "%08x" "${1?Missing Value}"
  printf \\U${val}
 }
 function chr.eascii.echo {
  local val
  printf -v ${val} "%02x" "${1?Missing Value}"
  printf \\x${val}
 }
 function chr.echo {
  if [ ${1} -lt 0x80 ]; then
   chr.eascii.echo "${@}"
  elif [ ${1} -lt 0x100 ]; then
   if [ "${LC_CTYPE:-${LC_ALL:-}}" = "C" ]; then
    chr.eascii.echo "${@}"
   else
    chr.utf8.echo "${@}"
   fi
  else
   chr.utf8.echo "${@}"
  fi
 }
 function chr.oct.echo {
      chr.echo "${1}" "0${2}"
 }
 function chr.hex.echo {
      chr.echo "${1}" "0x${2}"
 }
   chr.echo $(ord.echo A) # -> A
   ord.echo $(chr.echo 65) # -> 65

How do I convert an ASCII character to its decimal (or hexadecimal) value and back?

If you have a known octal or hexadecimal value (at script-writing time), you can just use printf:

   # POSIX
   printf '\x27\047\n'

This prints two literal ' characters (27 is the hexadecimal ASCII value of the character, and 47 is the octal value) and a newline.

If you need to convert characters (or numeric ASCII values) that are not known in advance (i.e., in variables), you can use something a little more complicated:

        ###############################################################
        #
        #       Note about Ext Ascii and UTF-8 encoding
        #
        # for values 0x00 - 0x7f identical
        # for values 0x80 - 0x00 conflict between UTF-8 & ExtAscii
        # for values 0x100 -0x7FFFFFFF Only UTF-8 UTF-32
        #
        # value         EAscii   UTF-8                          UTF-16  UTF-32
        # 0x20          "\x20"  "\x20"                          \u0020  \U00000020
        # 0x20          "\x7f"  "\x7f"                          \u007f  \U0000007f
        # 0x80          "\x80"  "\xc2\x80"                      \u0080  \U00000080
        # 0xff          "\xff"  "\xc3\xbf"                      \u00ff  \U000000ff
        # 0x100         N/A     "\xc4\x80"                      \u0100  \U00000100
        # 0x1000        N/A     "\xc8\x80"                      \u1000  \U00001000
        # 0xffff        N/A     "\xef\xbf\xbf"                  \uffff  \U0000ffff
        # 0x10000       N/A     "\xf0\x90\x80\x80"              N/A     \U00010000
        # 0xfffff       N/A     "\xf3\xbf\xbf\xbf"              N/A     \U000fffff
        # 0x10000000    N/A     "\xfc\x90\x80\x80\x80\x80"      N/A     \U10000000
        # 0x7fffffff    N/A     "\xfd\xbf\xbf\xbf\xbf\xbf"      N/A     \U7fffffff
        # 0x80000000    N/A     N/A                             N/A     N/A
        # 0xffffffff    N/A     N/A                             N/A     N/A

        ###########################################################################
        ## ord family
        ###########################################################################
        # ord        <Return Variable Name> <Char to convert> [Optional Format String]
        # ord.hex    <Return Variable Name> <Char to convert>
        # ord.oct    <Return Variable Name> <Char to convert>
        # ord.utf8   <Return Variable Name> <Char to convert> [Optional Format String]
        # ord.eascii <Return Variable Name> <Char to convert> [Optional Format String]
        # ord.echo                      <Char to convert> [Optional Format String]
        # ord.hex.echo                  <Char to convert>
        # ord.oct.echo                  <Char to convert>
        # ord.utf8.echo                 <Char to convert> [Optional Format String]
        # ord.eascii.echo               <Char to convert> [Optional Format String]
        #
        # Description:
        # converts character using native encoding to its decimal value and stores
        # it in the Variable specified
        #
        #       ord
        #       ord.hex         output in hex
        #       ord.hex         output in octal
        #       ord.utf8        forces UTF8 decoding
        #       ord.eascii      forces eascii decoding
        #       ord.echo        prints to stdout
        function ord {
                printf -v "${1?Missing Dest Variable}" "${3:-%d}" "'${2?Missing Char}"
        }
        function ord.oct {
                ord "${@:1:2}" "%c"
        }
        function ord.hex {
                ord "${@:1:2}" "%x"
        }
        function ord.utf8 {
                LC_CTYPE=en_US.UTF8 ord "${@}"
        }
        function ord.eascii {
                LC_CTYPE=C ord "${@}"
        }
        function ord.echo {
                printf "${2:-%d}" "'${1?Missing Char}"
        }
        function ord.oct.echo {
                ord.echo "${@:1:1}" "%o"
        }
        function ord.hex.echo {
                ord.echo "${@:1:1}" "%x"
        }
        function ord.utf8.echo {
                LC_CTYPE=en_US.UTF8 ord.echo "${@}"
        }
        function ord.eascii.echo {
                LC_CTYPE=C ord.echo "${@}"
        }

        ###########################################################################
        ## chr family
        ###########################################################################
        # chr.utf8   <Return Variale Name> <Integer to convert>
        # chr.eascii <Return Variale Name> <Integer to convert>
        # chr        <Return Variale Name> <Integer to convert>
        # chr.oct    <Return Variale Name> <Octal number to convert>
        # chr.hex    <Return Variale Name> <Hex number to convert>
        # chr.utf8.echo                  <Integer to convert>
        # chr.eascii.echo                <Integer to convert>
        # chr.echo                       <Integer to convert>
        # chr.oct.echo                   <Octal number to convert>
        # chr.hex.echo                   <Hex number to convert>
        #
        # Description:
        # converts decimal value to character representation an stores
        # it in the Variable specified
        #
        #       chr                     Tries to guess output format
        #       chr.utf8                forces UTF8 encoding
        #       chr.eascii              forces eascii encoding
        #       chr.echo                prints to stdout
        #
        function chr.utf8 {
                local val
                printf -v ${val} '\\U%08x' "${2?Missing Value}"
                printf -v ${1?Missing Dest Variable} ${val}
        }
        function chr.eascii {
                local val
                [ ${1} -lt 0x100 ] || return 1
                printf -v ${val} '\\x%02x' "${2?Missing Value}"
                printf -v ${1?Missing Dest Variable} ${val}
        }
        function chr {
                if [ ${1} -lt 0x80 ]; then
                        chr.eascii "${@}"
                elif [ ${1} -lt 0x100 ]; then
                        if [ "${LC_CTYPE:-${LC_ALL:-}}" = "C" ]; then
                                chr.eascii "${@}"
                        else
                                chr.utf8 "${@}"
                        fi
                else
                        chr.utf8 "${@}"
                fi
        }
        function chr.oct {
                chr "${1}" "0${2}"
        }
        function chr.hex {
                chr "${1}" "0x${2}"
        }
        function chr.utf8.echo {
                local val
                printf -v ${val} "%08x" "${1?Missing Value}"
                printf \\U${val}
        }
        function chr.eascii.echo {
                local val
                printf -v ${val} "%02x" "${1?Missing Value}"
                printf \\x${val}
        }
        function chr.echo {
                if [ ${1} -lt 0x80 ]; then
                        chr.eascii.echo "${@}"
                elif [ ${1} -lt 0x100 ]; then
                        if [ "${LC_CTYPE:-${LC_ALL:-}}" = "C" ]; then
                                chr.eascii.echo "${@}"
                        else
                                chr.utf8.echo "${@}"
                        fi
                else
                        chr.utf8.echo "${@}"
                fi
        }
        function chr.oct.echo {
                chr.echo "${1}" "0${2}"
        }
        function chr.hex.echo {
                chr.echo "${1}" "0x${2}"
        }
   chr.echo $(ord.echo A)    # -> A
   ord.echo $(chr.echo 65)   # -> 65

The ord function above is quite tricky.

  • Tricky? Rather, it's using a feature that I can't find documented anywhere -- putting a single quote in front of an integer. Neat effect, but how on earth did you find out about it? Source diving? -- GreyCat

    • It validates The Single Unix Specification: "If the leading character is a single-quote or double-quote, the value shall be the numeric value in the underlying codeset of the character following the single-quote or double-quote." (see printf() to know more) -- mjf

BashFAQ/071 (last edited 2021-02-08 16:03:51 by GreyCat)