From 686bb6ad3392a7cf9396889bb636b79ab7ab7d41 Mon Sep 17 00:00:00 2001
From: Rasmus Andersson <rasmus@notion.se>
Date: Sun, 24 Sep 2017 17:54:57 -0700
Subject: Improvements to misc/rmglyph.py

---
 misc/rmglyph.py | 183 ++++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 153 insertions(+), 30 deletions(-)

(limited to 'misc')

diff --git a/misc/rmglyph.py b/misc/rmglyph.py
index ffc49c0b0..f7b0489c4 100755
--- a/misc/rmglyph.py
+++ b/misc/rmglyph.py
@@ -20,6 +20,21 @@ def readLines(filename):
     return f.read().strip().splitlines()
 
 
+def loadAGL(filename):  # -> { 2126: 'Omega', ... }
+  m = {}
+  with open(filename, 'r') as f:
+    for line in f:
+      # Omega;2126
+      # dalethatafpatah;05D3 05B2   # higher-level combinations; ignored
+      line = line.strip()
+      if len(line) > 0 and line[0] != '#':
+        name, uc = tuple([c.strip() for c in line.split(';')])
+        if uc.find(' ') == -1:
+          # it's a 1:1 mapping
+          m[int(uc, 16)] = name
+  return m
+
+
 def decomposeComponentInstances(font, glyph, componentsToDecompose):
   """Moves the components of a glyph to its outline."""
   if len(glyph.components):
@@ -46,16 +61,16 @@ def deepCopyContours(font, parent, component, offset, scale, componentsToDecompo
     parent.appendContour(contour)
 
 
-def addGlyphsForCP(cp, ucmap, glyphnames):
+def addGlyphsForCPFont(cp, ucmap, glyphnames):
   if cp in ucmap:
     for name in ucmap[cp]:
-      glyphnames.append(name)
+      glyphnames.add(name)
   # else:
   #   print('no glyph for U+%04X' % cp)
 
 
-def getGlyphNamesFromArgs(font, ucmap, glyphs):
-  glyphnames = []
+def getGlyphNamesFont(font, ucmap, glyphs):
+  glyphnames = set()
   for s in glyphs:
     if len(s) > 2 and s[:2] == 'U+':
       p = s.find('-')
@@ -64,13 +79,44 @@ def getGlyphNamesFromArgs(font, ucmap, glyphs):
         cpStart = int(s[2:p], 16)
         cpEnd = int(s[p+1:], 16)
         for cp in range(cpStart, cpEnd):
-          addGlyphsForCP(cp, ucmap, glyphnames)
+          addGlyphsForCPFont(cp, ucmap, glyphnames)
       else:
         # single code point e.g. "U+1D0A"
         cp = int(s[2:], 16)
-        addGlyphsForCP(cp, ucmap, glyphnames)
-    else:
-      glyphnames.append(s)
+        addGlyphsForCPFont(cp, ucmap, glyphnames)
+    elif s in font:
+      glyphnames.add(s)
+  return glyphnames
+
+
+def addGlyphsForCPComps(cp, comps, agl, glyphnames):
+  uniName = 'uni%04X' % cp
+  symbolicName = agl.get(cp)
+  if uniName in comps:
+    glyphnames.add(uniName)
+  if symbolicName in comps:
+    glyphnames.add(symbolicName)
+
+
+def getGlyphNamesComps(comps, agl, glyphs):
+  # comps: { glyphName => (baseName, accentNames, offset) ... }
+  # agl:   { 2126: 'Omega' ... }
+  glyphnames = set()
+  for s in glyphs:
+    if len(s) > 2 and s[:2] == 'U+':
+      p = s.find('-')
+      if p != -1:
+        # range, e.g. "U+1D0A-1DBC"
+        cpStart = int(s[2:p], 16)
+        cpEnd = int(s[p+1:], 16)
+        for cp in range(cpStart, cpEnd):
+          addGlyphsForCPComps(cp, comps, agl, glyphnames)
+      else:
+        # single code point e.g. "U+1D0A"
+        cp = int(s[2:], 16)
+        addGlyphsForCPComps(cp, comps, agl, glyphnames)
+    elif s in comps:
+      glyphnames.add(s)
   return glyphnames
 
 
@@ -138,6 +184,17 @@ def fmtGlyphComposition(glyphName, baseName, accentNames, offset):
   return s
 
 
+def loadGlyphCompositions(filename):  # { glyphName => (baseName, accentNames, offset) }
+  compositions = OrderedDict()
+  with open(filename, 'r') as f:
+    for line in f:
+      line = line.strip()
+      if len(line) > 0 and line[0] != '#':
+        glyphName, baseName, accentNames, offset = parseGlyphComposition(line)
+        compositions[glyphName] = (baseName, accentNames, offset)
+  return compositions
+
+
 def updateDiacriticsFile(filename, rmnames):
   lines = []
   didChange = False
@@ -150,14 +207,14 @@ def updateDiacriticsFile(filename, rmnames):
       glyphName, baseName, accentNames, offset = parseGlyphComposition(line)
 
       skipLine = False
-      if baseName in rmnames:
+      if baseName in rmnames or glyphName in rmnames:
         skipLine = True
       else:
-        for names in accentNames:
-          for name in names:
-            if name in rmnames:
-              skipLine = True
-              break
+        for accent in accentNames:
+          name = accent[0]
+          if name in rmnames:
+            skipLine = True
+            break
 
       if not skipLine:
         lines.append(line)
@@ -169,7 +226,8 @@ def updateDiacriticsFile(filename, rmnames):
     print('Writing', filename)
     if not dryRun:
       with open(filename, 'w') as f:
-        f.write('\n'.join(lines))
+        for line in lines:
+          f.write(line + '\n')
 
 
 def configFindResFile(config, basedir, name):
@@ -182,9 +240,59 @@ def configFindResFile(config, basedir, name):
   return fn
 
 
+includeRe = re.compile(r'^include\(([^\)]+)\);\s*$')
+tokenSepRe = re.compile(r'([\@A-Za-z0-9_\.]+|[=\-\[\]\(\)\{\};<>\'])')
+spaceRe = re.compile(r'[ \t]+')
+
+
+def loadFeaturesFile(filepath, followIncludes=True):
+  print('read', filepath)
+  lines = []
+  with open(filepath, 'r') as f:
+    for line in f:
+      m = includeRe.match(line)
+      if m is not None:
+        if followIncludes:
+          includedFilename = m.group(1)
+          includedPath = os.path.normpath(os.path.join(os.path.dirname(filepath), includedFilename))
+          lines = lines + loadFeaturesFile(includedPath, followIncludes)
+      else:
+        lines.append(line)
+  return lines
+
 
+def collapseSpace(s):
+  lm = len(s) - len(s.lstrip(' \t'))
+  return s[:lm] + spaceRe.sub(' ', s[lm:])
 
-def main(argv=sys.argv):
+def updateFeaturesFile(filename, rmnames):
+  # this is a VERY crude approach that simply tokenizes the input and filters
+  # out strings that seem to be names but are not found in glyphnames.
+
+  lines = []
+  didChange = False
+
+  for line in loadFeaturesFile(filename, followIncludes=False):
+    line = line.rstrip('\r\n ')
+    tokens = tokenSepRe.split(line)
+    tokens2 = [t for t in tokens if t not in rmnames]
+    if len(tokens2) != len(tokens):
+      line = collapseSpace(''.join(tokens2))
+      didChange = True
+    lines.append(line)
+
+  if didChange:
+    print('Write', filename)
+    if not dryRun:
+      with open(filename, 'w') as f:
+        for line in lines:
+          f.write(line + '\n')
+
+  return didChange
+
+
+
+def main(argv=None):
   argparser = ArgumentParser(
     description='Remove glyphs from all UFOs in src dir')
 
@@ -209,7 +317,7 @@ def main(argv=sys.argv):
          'a Unicode code point formatted as "U+<CP>", '+
          'or a Unicode code point range formatted as "U+<CP>-<CP>"')
 
-  args = argparser.parse_args()
+  args = argparser.parse_args(argv)
   global dryRun
   dryRun = args.dryRun
   BASEDIR = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir))
@@ -244,8 +352,23 @@ def main(argv=sys.argv):
     print('No UFOs found in', srcDir, file=sys.stderr)
     sys.exit(1)
 
-  rmnamesUnion = set()
+  # load fontbuild config
+  config = RawConfigParser(dict_type=OrderedDict)
+  configFilename = os.path.join(srcDir, 'fontbuild.cfg')
+  config.read(configFilename)
+  glyphOrderFile = configFindResFile(config, srcDir, 'glyphorder')
+  diacriticsFile = configFindResFile(config, srcDir, 'diacriticfile')
+  featuresFile = os.path.join(srcDir, 'features.fea')
+
+  # load AGL and diacritics
+  agl = loadAGL(os.path.join(srcDir, 'glyphlist.txt')) # { 2126: 'Omega', ... }
+  comps = loadGlyphCompositions(diacriticsFile)
+    # { glyphName => (baseName, accentNames, offset) }
+
+  # find glyphnames to remove that are composed (removal happens later)
+  rmnamesUnion = getGlyphNamesComps(comps, agl, args.glyphs)
 
+  # find glyphnames to remove from UFOs (and remove them) 
   for fontPath in fontPaths:
     relFontPath = os.path.relpath(fontPath, BASEDIR)
     print('Loading glyph data for %s...' % relFontPath)
@@ -253,7 +376,7 @@ def main(argv=sys.argv):
     ucmap = font.getCharacterMapping()  # { 2126: [ 'Omega', ...], ...}
     cnmap = font.getReverseComponentMapping()  # { 'A' : ['Aacute', 'Aring'], 'acute' : ['Aacute'] ... }
 
-    glyphnames = set(getGlyphNamesFromArgs(font, ucmap, args.glyphs))
+    glyphnames = getGlyphNamesFont(font, ucmap, args.glyphs)
 
     if len(glyphnames) == 0:
       print('None of the glyphs requested exist in', relFontPath, file=sys.stderr)
@@ -348,29 +471,29 @@ def main(argv=sys.argv):
 
 
   # fontbuild config
-  config = RawConfigParser(dict_type=OrderedDict)
-  configFilename = os.path.join(srcDir, 'fontbuild.cfg')
-  config.read(configFilename)
-  glyphOrderFile = configFindResFile(config, srcDir, 'glyphorder')
-  diacriticsFile = configFindResFile(config, srcDir, 'diacriticfile')
-
   updateDiacriticsFile(diacriticsFile, rmnamesUnion)
   updateConfigFile(config, configFilename, rmnamesUnion)
+  featuresChanged = updateFeaturesFile(featuresFile, rmnamesUnion)
 
 
   print('\n————————————————————————————————————————————————————\n'+
         'Removed %d glyphs:\n  %s' % (
           len(rmnamesUnion), '\n  '.join(sorted(rmnamesUnion))))
 
-  print('\n————————————————————————————————————————————————————\n\n'+
-        'You now need to manually remove any occurances of these glyphs in\n'+
-        ' src/features.fea\n'+
-        ' %s/features.fea\n' % '/features.fea\n '.join(fontPaths))
+  print('\n————————————————————————————————————————————————————\n')
+
+  if featuresChanged:
+    print('You need to manually edit features.\n'+
+          '- git diff src/features.fea\n'+
+          '- $EDITOR %s/features.fea\n' % '/features.fea\n- $EDITOR '.join(fontPaths))
 
-  print(('You should also re-generate %s via\n'+
+  print(('You need to re-generate %s via\n'+
          '`make src/glyphorder.txt` (or misc/misc/gen-glyphorder.py)'
         ) % glyphOrderFile)
 
+  print('\nFinally, you should build the Medium weight and make sure it all '+
+        'looks good and that no mixglyph failures occur. E.g. `make Medium -j`')
+
 
 if __name__ == '__main__':
   main()
-- 
cgit v1.2.3