Class: Unisec::Properties

Inherits:
Object
  • Object
show all
Defined in:
lib/unisec/properties.rb

Overview

Manipulate Unicode properties

Class Method Summary collapse

Class Method Details

.char(chr) ⇒ Hash

Returns all properties of a given unicode character (code point)

Examples:

Unisec::Properties.char('é')
# =>
# {:age=>"1.1",
# … }

Parameters:

  • chr (String)

    Unicode code point (as character / string)

Returns:

  • (Hash)

    All properties of the given code point



66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# File 'lib/unisec/properties.rb', line 66

def self.char(chr)
  cp_num = TwitterCldr::Utils::CodePoints.from_string(chr)
  cp = TwitterCldr::Shared::CodePoint.get(cp_num.first)
  props = cp.properties
  props_hash = props.properties_hash.dup
  %w[Age Block General_Category Script].each { |p| props_hash.delete(p) } # Remaining properties
  categories = props.general_category.map do |cat|
    TwitterCldr::Shared::PropertyValueAliases.long_alias_for('gc', cat)
  end
  {
    age: props.age.join,
    block: props.block.join,
    category: categories[1],
    subcategory: categories[0],
    codepoint: Properties.char2codepoint(chr),
    name: cp.name,
    script: props.script.join,
    case: {
      ruby: {
        lowercase: chr.downcase,
        uppercase: chr.upcase
      },
      twitter: {
        lowercase: chr.localize.downcase.to_s,
        uppercase: chr.localize.upcase.to_s,
        titlecase: chr.localize.titlecase.to_s,
        casefold: chr.localize.casefold.to_s
      }
    },
    normalization: {
      ruby: {
        nfkd: chr.unicode_normalize(:nfkd),
        nfkc: chr.unicode_normalize(:nfkc),
        nfd: chr.unicode_normalize(:nfd),
        nfc: chr.unicode_normalize(:nfc)
      },
      twitter: {
        nfkd: chr.localize.normalize(using: :NFKD).to_s,
        nfkc: chr.localize.normalize(using: :NFKC).to_s,
        nfd: chr.localize.normalize(using: :NFD).to_s,
        nfc: chr.localize.normalize(using: :NFC).to_s
      }
    },
    other_properties: props_hash
  }
end

.char2codepoint(chr) ⇒ String

Display the code point in Unicode format for a given character (code point as string)

Examples:

Unisec::Properties.char2codepoint('💎') # => "U+1F48E"

Parameters:

  • chr (String)

    Unicode code point (as character / string)

Returns:

  • (String)

    code point in Unicode format



160
161
162
# File 'lib/unisec/properties.rb', line 160

def self.char2codepoint(chr)
  Properties.deccp2stdhexcp(chr.codepoints.first)
end

.char_display(chr, extended: false) ⇒ Object

Display a CLI-friendly output listing all properties corresponding to character (code point)

Parameters:

  • chr (String)

    Unicode code point (as character / string)

  • extended (String) (defaults to: false)

    By default, it will only show common properties, with extended set to true it will show all of them.



117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# File 'lib/unisec/properties.rb', line 117

def self.char_display(chr, extended: false)
  data = Properties.char(chr)
  display = ->(key, value) { puts Paint[key, :red, :bold].ljust(30) + " #{value}" }
  display.call('Name:', data[:name])
  display.call('Code Point:', data[:codepoint])
  puts
  display.call('Block:', data[:block])
  display.call('Category:', data[:category])
  display.call('Sub-Category:', data[:subcategory])
  display.call('Script:', data[:script])
  display.call('Since (age):', "Version #{data[:age]}")
  puts
  x = data.dig(:case, :twitter, :uppercase)
  display.call('Uppercase:', x + " (#{Properties.char2codepoint(x)})")
  x = data.dig(:case, :twitter, :lowercase)
  display.call('Lowercase:', x + " (#{Properties.char2codepoint(x)})")
  x = data.dig(:case, :twitter, :titlecase)
  display.call('Titlecase:', x + " (#{Properties.char2codepoint(x)})")
  x = data.dig(:case, :twitter, :casefold)
  display.call('Casefold:', x + " (#{Properties.char2codepoint(x)})")
  puts
  x = data.dig(:normalization, :twitter, :nfkd)
  display.call('Normalization NFKD:', x + " (#{Properties.chars2codepoints(x)})")
  x = data.dig(:normalization, :twitter, :nfkc)
  display.call('Normalization NFKC:', x + " (#{Properties.chars2codepoints(x)})")
  x = data.dig(:normalization, :twitter, :nfd)
  display.call('Normalization NFD:', x + " (#{Properties.chars2codepoints(x)})")
  x = data.dig(:normalization, :twitter, :nfc)
  display.call('Normalization NFC:', x + " (#{Properties.chars2codepoints(x)})")
  if extended
    puts
    data[:other_properties].each do |k, v|
      display.call(k, v&.join)
    end
  end
  nil
end

.chars2codepoints(chrs) ⇒ String

Display the code points in Unicode format for the given characters (code points as string)

Examples:

Unisec::Properties.chars2codepoints("ỳ́") # => "U+0079 U+0300 U+0301"
Unisec::Properties.chars2codepoints("🧑‍🌾") # => "U+1F9D1 U+200D U+1F33E"

Parameters:

  • chrs (String)

    Unicode code points (as characters / string)

Returns:

  • (String)

    code points in Unicode format



170
171
172
173
174
175
176
# File 'lib/unisec/properties.rb', line 170

def self.chars2codepoints(chrs)
  out = []
  chrs.each_char do |chr|
    out << Properties.char2codepoint(chr)
  end
  out.join(' ')
end

.codepoints(prop) ⇒ Array<Hash>

List all code points for a given property

Examples:

Unisec::Properties.codepoints('Quotation_Mark')
# =>
# [{:char=>"\"", :codepoint=>34, :name=>"QUOTATION MARK"},
#  {:char=>"'", :codepoint=>39, :name=>"APOSTROPHE"},
#  … ]

Parameters:

  • prop (String)

    the property name

Returns:

  • (Array<Hash>)

    Array of code points ({char: String, codepoint: Integer, name: String})



26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# File 'lib/unisec/properties.rb', line 26

def self.codepoints(prop)
  cp = TwitterCldr::Shared::CodePoint
  out = []
  ranges = cp.properties.code_points_for_property(prop).ranges
  ranges.each do |range|
    range.each do |i|
      codepoint = cp.get(i)
      out << {
        char: TwitterCldr::Utils::CodePoints.to_string([codepoint.code_point]),
        codepoint: codepoint.code_point,
        name: codepoint.name
      }
    end
  end
  out
end

.codepoints_display(prop) ⇒ Object

Display a CLI-friendly output listing all code points corresponding to a property.

Examples:

Unisec::Properties.codepoints_display('Quotation_Mark')
# =>
# U+0022      "    QUOTATION MARK
# U+0027      '    APOSTROPHE
# …


50
51
52
53
54
55
56
# File 'lib/unisec/properties.rb', line 50

def self.codepoints_display(prop)
  codepoints = Properties.codepoints(prop)
  codepoints.each do |cp|
    puts "#{Properties.deccp2stdhexcp(cp[:codepoint]).ljust(7)} #{cp[:char].ljust(4)} #{cp[:name]}"
  end
  nil
end

.deccp2stdhexcp(int_cp) ⇒ String

Convert from decimal code point to standardized format hexadecimal code point

Examples:

Unisec::Properties.intcp2stdhexcp(128640) # => "U+1F680"

Parameters:

  • int_cp (Integer)

    Code point in decimal format

Returns:

  • (String)

    code point in Unicode format



183
184
185
# File 'lib/unisec/properties.rb', line 183

def self.deccp2stdhexcp(int_cp)
  "U+#{format('%.4x', int_cp).upcase}"
end

.listArray<String>

List Unicode properties name

Examples:

Unisec::Properties.list # => ["ASCII_Hex_Digit", "Age", "Alphabetic", … ]

Returns:

  • (Array<String>)

    properties name



13
14
15
# File 'lib/unisec/properties.rb', line 13

def self.list
  TwitterCldr::Shared::CodePoint.properties.property_names
end