An acronym capitalization checker with regex

camdez · August 8, 2021, 6:45pm

I decided to take a stab an implementing this since it seemed like a fun little challenge:

(ns camdez.re-opt
  (:require [clojure.walk :as walk]
            [clojure.string :as str]))

;; Convert strings to a graph (nested maps) representing common prefix
;; strings:
;;
;; A -> B -> C
;;   -> O -> U -> T
;;   -> P -> P -> L -> E
;;
;; Then collapse tails into strings and branches into regex
;; alternations.
(defn- re-literal-opts-str [opts]
  (->> opts
       (reduce (fn [acc s]
                 (assoc-in acc (butlast s) {(last s) nil}))
               {})
       (walk/postwalk (fn [x]
                        (cond
                          (not (map? x))  x
                          (= 1 (count x)) (apply str (first x))
                          :else           (str "(?:" (str/join "|" (map (fn [[k v]] (str k v)) x)) ")"))))))

(defn re-literal-opts
  "Builds an optimized regular expression for matching any of the string
  literals in `opts`."
  [opts]
  (re-pattern (re-literal-opts-str opts)))

(defn re-literal-word-opts
  "Builds an optimized regular expression for matching any of the string
  literals in `opts` at word boundaries."
  [opts]
  (re-pattern (str "\\b(?:" (re-literal-opts-str opts) ")\\b")))

;;; Examples

(def sample-opts ["ABC" "ABOUT" "APPLE" "BOTTLE"])

(re-literal-opts-str sample-opts)
;; => "(?:A(?:B(?:C|OUT)|PPLE)|BOTTLE)"

(def p1 (re-literal-opts sample-opts))

(re-seq p1 "WHAT ABOUT BOTTLES? A BOTTLE?")
;; => ("ABOUT" "BOTTLE" "BOTTLE")

(def p2 (re-literal-word-opts sample-opts))

(re-seq p2 "WHAT ABOUT BOTTLES? A BOTTLE?")
;; => ("ABOUT" "BOTTLE")

Pretty happy with how it turned out. Only about 20 minutes of work given the magic of Clojure.