prosodic.ents

   1from typing import Any
   2from .imports import *
   3
   4
   5class Entity(UserList):
   6    """
   7    Root Entity class representing a hierarchical structure in prosodic analysis.
   8
   9    This class serves as the base for various prosodic entities such as texts, stanzas,
  10    lines, words, syllables, and phonemes. It provides common functionality for
  11    managing hierarchical relationships, attributes, and data representation.
  12
  13    Attributes:
  14        child_type (str): The type of child entities this entity can contain.
  15        is_parseable (bool): Whether this entity can be parsed.
  16        index_name (str): The name used for indexing this entity type.
  17        prefix (str): A prefix used for attribute naming.
  18        list_type (type): The type of list used for storing children.
  19        cached_properties_to_clear (list): Properties to clear from cache.
  20        use_cache (bool): Whether to use caching for this entity.
  21        sep (str): Separator used when joining child texts.
  22    """
  23
  24    child_type = "Text"
  25    is_parseable = False
  26    index_name = None
  27    prefix = "ent"
  28    list_type = None
  29    cached_properties_to_clear = []
  30    use_cache = False
  31    sep = ""
  32
  33    def __init__(self, txt: str = "", children=[], parent=None, **kwargs):
  34        """
  35        Initialize an Entity object.
  36
  37        Args:
  38            txt (str): The text content of the entity.
  39            children (list): List of child entities.
  40            parent (Entity): The parent entity.
  41            **kwargs: Additional attributes to set on the entity.
  42        """
  43        self.parent = parent
  44        newchildren = []
  45        for child in children:
  46            if not isinstance(child, Entity):
  47                logger.warning(f"{child} is not an Entity")
  48                continue
  49            newchildren.append(child)
  50            # if not child.is_wordtype:   # don't do this for wordtypes since each wordtype is a single/shared python object
  51            child.parent = self
  52        children = newchildren
  53        if self.list_type is None:
  54            self.list_type = 'EntityList'
  55        from .imports import GLOBALS
  56        self.children = GLOBALS[self.list_type](children)
  57        self._attrs = kwargs
  58        self._txt = txt
  59        self._mtr = None
  60        for k, v in self._attrs.items():
  61            setattr(self, k, v)
  62
  63    def __iter__(self):
  64        """
  65        Iterate over the children of this entity.
  66
  67        Yields:
  68            Entity: The next child entity.
  69        """
  70        yield from self.children
  71
  72    def to_hash(self):
  73        """
  74        Generate a hash representation of the entity.
  75
  76        Returns:
  77            str: A hash string representing the entity's content and attributes.
  78        """
  79        return hashstr(
  80            self.txt, tuple(sorted(self._attrs.items())), self.__class__.__name__
  81        )
  82
  83    @cached_property
  84    def html(self):
  85        """
  86        Get the HTML representation of the entity.
  87
  88        Returns:
  89            str: HTML representation of the entity, if available.
  90        """
  91        if hasattr(self, "to_html"):
  92            return self.to_html()
  93
  94    @cached_property
  95    def key(self):
  96        """
  97        Generate a unique key for the entity.
  98
  99        Returns:
 100            str: A string key representing the entity's class and attributes.
 101        """
 102        attrs = {
 103            **{k: v for k, v in self.attrs.items() if v is not None},
 104            "txt": self._txt,
 105        }
 106        return f"{self.__class__.__name__}({get_attr_str(attrs)})"
 107
 108    @cached_property
 109    def hash(self):
 110        """
 111        Get a hash value for the entity.
 112
 113        Returns:
 114            str: A hash string for the entity.
 115        """
 116        return hashstr(self.key)
 117
 118    def __hash__(self):
 119        """
 120        Get the hash value for use in hash-based collections.
 121
 122        Returns:
 123            int: The hash value of the entity.
 124        """
 125        return hash(self.hash)
 126
 127    def __eq__(self, other):
 128        """
 129        Check if this entity is equal to another.
 130
 131        Args:
 132            other: The other object to compare with.
 133
 134        Returns:
 135            bool: True if the objects are the same instance, False otherwise.
 136        """
 137        return self is other
 138
 139    def __bool__(self):
 140        """
 141        Check if the entity is considered True in a boolean context.
 142
 143        Returns:
 144            bool: Always returns True for Entity objects.
 145        """
 146        return True
 147
 148    def to_json(self, fn=None, no_txt=False, yes_txt=False, **kwargs):
 149        """
 150        Convert the entity to a JSON representation.
 151
 152        Args:
 153            fn (str, optional): Filename to save the JSON output.
 154            no_txt (bool): If True, exclude the text content.
 155            yes_txt (bool): If True, include the full text content.
 156            **kwargs: Additional key-value pairs to include in the JSON.
 157
 158        Returns:
 159            dict: A dictionary representation of the entity.
 160        """
 161        txt = (self._txt if not yes_txt else self.txt) if not no_txt else None
 162        return to_json(
 163            {
 164                "_class": self.__class__.__name__,
 165                **({"txt": txt} if txt is not None and (yes_txt or txt) else {}),
 166                "children": [kid.to_json() for kid in self.children],
 167                **kwargs,
 168            },
 169            fn=fn,
 170        )
 171
 172    def save(self, fn, **kwargs):
 173        """
 174        Save the entity to a file in JSON format.
 175
 176        Args:
 177            fn (str): The filename to save to.
 178            **kwargs: Additional arguments to pass to to_json.
 179
 180        Returns:
 181            The result of to_json with the given filename.
 182        """
 183        return self.to_json(fn=fn, **kwargs)
 184
 185    def render(self, as_str=False):
 186        """
 187        Render the entity as HTML.
 188
 189        Args:
 190            as_str (bool): If True, return the result as a string.
 191
 192        Returns:
 193            str or HTML: The rendered HTML representation of the entity.
 194        """
 195        return self.to_html(as_str=as_str)
 196
 197    @staticmethod
 198    def from_json(json_d):
 199        """
 200        Create an Entity object from a JSON dictionary.
 201
 202        Args:
 203            json_d (dict): A dictionary containing the entity data.
 204
 205        Returns:
 206            Entity: An instance of the appropriate Entity subclass.
 207        """
 208        from .imports import GLOBALS, CHILDCLASSES
 209
 210        classname = json_d["_class"]
 211        classx = GLOBALS[classname]
 212        childx = CHILDCLASSES.get(classname)
 213        children = json_d.get("children", [])
 214        inpd = {k: v for k, v in json_d.items() if k not in {"children", "_class"}}
 215        if children and childx:
 216            children = [childx.from_json(d) for d in json_d["children"]]
 217        return classx(children=tuple(children), **inpd)
 218
 219    @property
 220    def attrs(self):
 221        """
 222        Get the attributes of the entity.
 223
 224        Returns:
 225            dict: A dictionary of the entity's attributes.
 226        """
 227        odx = {"num": self.num}
 228        if (
 229            self.__class__.__name__
 230            not in {"Text", "Stanza", "MeterLine", "MeterText", "Meter"}
 231            and self.txt
 232        ):
 233            odx["txt"] = self.txt
 234        return {**odx, **self._attrs}
 235
 236    @cached_property
 237    def prefix_attrs(self, with_parent=True):
 238        """
 239        Get the attributes of the entity with a prefix.
 240
 241        Args:
 242            with_parent (bool): If True, include parent attributes.
 243
 244        Returns:
 245            dict: A dictionary of the entity's attributes with a prefix.
 246        """
 247
 248        def getkey(k):
 249            o = f"{self.prefix}_{k}"
 250            o = DF_COLS_RENAME.get(o, o)
 251            return o
 252
 253        odx = {getkey(k): v for k, v in self.attrs.items() if v is not None}
 254        if with_parent and self.parent:
 255            return {**self.parent.prefix_attrs, **odx}
 256        return odx
 257
 258    @cached_property
 259    def txt(self):
 260        """
 261        Get the text content of the entity.
 262
 263        Returns:
 264            str: The text content of the entity.
 265        """
 266        if self._txt:
 267            txt = self._txt
 268        elif self.children:
 269            txt = self.child_class.sep.join(child.txt for child in self.children)
 270        else:
 271            txt = ""
 272        return clean_text(txt)
 273
 274    @cached_property
 275    def data(self):
 276        """
 277        Get the data associated with the entity.
 278
 279        Returns:
 280            list: The list of child entities.
 281        """
 282        return self.children
 283
 284    @cached_property
 285    def l(self):
 286        """
 287        Get the list of child entities.
 288
 289        Returns:
 290            list: The list of child entities.
 291        """
 292        return self.children
 293
 294    def clear_cached_properties(self):
 295        """
 296        Clear cached properties to free up memory.
 297        """
 298        for prop in self.cached_properties_to_clear:
 299            if prop in self.__dict__:
 300                del self.__dict__[prop]
 301            # elif hasattr(self,prop):
 302            #     try:
 303            #         func = getattr(self,prop)
 304            #         func.clear_cache()
 305            #     except AttributeError:
 306            #         pass
 307
 308    def inspect(self, indent=0, maxlines=None, incl_phons=False):
 309        """
 310        Inspect the entity and its children.
 311
 312        Args:
 313            indent (int): The indentation level for the output.
 314            maxlines (int): The maximum number of lines to display.
 315            incl_phons (bool): If True, include phoneme information.
 316        """
 317        attrstr = get_attr_str(self.attrs)
 318        myself = f"{self.__class__.__name__}({attrstr})"
 319        if indent:
 320            myself = textwrap.indent(myself, "|" + (" " * (indent - 1)))
 321        lines = [myself]
 322        for child in self.children:
 323            if isinstance(child, Entity) and (
 324                incl_phons or not child.__class__.__name__.startswith("Phoneme")
 325            ):
 326                lines.append(
 327                    child.inspect(indent=indent + 4, incl_phons=incl_phons).replace(
 328                        "PhonemeClass", "Phoneme"
 329                    )
 330                )
 331        # self.__class__.__name__ in {'Text', 'Stanza', 'Line'}
 332        dblbreakfor = False
 333        breakstr = "\n|\n" if dblbreakfor else "\n"
 334        o = breakstr.join(lines)
 335        if not indent:
 336            if maxlines:
 337                o = "\n".join(o.split("\n")[:maxlines])
 338            print(o)
 339        else:
 340            return o
 341
 342    def _repr_html_(self, df=None):
 343        """
 344        Get the HTML representation of the entity.
 345
 346        Args:
 347            df (DataFrame): An optional DataFrame to use for rendering.
 348
 349        Returns:
 350            str: The HTML representation of the entity.
 351        """
 352
 353        def blank(x):
 354            if x in {None, np.nan}:
 355                return ""
 356            return x
 357
 358        return (self.df if df is None else df).applymap(blank)._repr_html_()
 359
 360    def __repr__(self, attrs=None, bad_keys=None):
 361        """
 362        Get a string representation of the entity.
 363
 364        Args:
 365            attrs (dict): An optional dictionary of attributes to use.
 366            bad_keys (list): An optional list of keys to exclude.
 367
 368        Returns:
 369            str: A string representation of the entity.
 370        """
 371        d = {
 372            k: v
 373            for k, v in (
 374                attrs
 375                if attrs is not None
 376                else (self.attrs if self.attrs is not None else self._attrs)
 377            ).items()
 378        }
 379        return f"{self.__class__.__name__}({get_attr_str(d, bad_keys=bad_keys)})"
 380
 381    @cached_property
 382    def ld(self):
 383        """
 384        Get a list of dictionaries representing the entity and its children.
 385
 386        Returns:
 387            list: A list of dictionaries representing the entity and its children.
 388        """
 389        return self.get_ld()
 390
 391    @cached_property
 392    def child_class(self):
 393        """
 394        Get the class of the child entities.
 395
 396        Returns:
 397            type: The class of the child entities.
 398        """
 399        from .imports import GLOBALS
 400
 401        return GLOBALS.get(self.child_type)
 402
 403    def get_ld(self, incl_phons=False, incl_sylls=True, multiple_wordforms=True):
 404        """
 405        Get a list of dictionaries representing the entity and its children.
 406
 407        Args:
 408            incl_phons (bool): If True, include phoneme information.
 409            incl_sylls (bool): If True, include syllable information.
 410            multiple_wordforms (bool): If True, include multiple word forms.
 411
 412        Returns:
 413            list: A list of dictionaries representing the entity and its children.
 414        """
 415        if not incl_sylls and self.child_type == "Syllable":
 416            return [{**self.prefix_attrs}]
 417        if not incl_phons and self.child_type == "Phoneme":
 418            return [{**self.prefix_attrs}]
 419        good_children = [c for c in self.children if isinstance(c, Entity)]
 420        # logger.debug(f'good children of {type(self)} -> {good_children}')
 421        if not multiple_wordforms and self.child_type == "WordForm" and good_children:
 422            good_children = good_children[:1]
 423            # logger.debug(f'good children now {good_children}')
 424        if good_children:
 425            return [
 426                {**self.prefix_attrs, **child.prefix_attrs, **grandchild_d}
 427                for child in good_children
 428                for grandchild_d in child.get_ld(
 429                    incl_phons=incl_phons,
 430                    incl_sylls=incl_sylls,
 431                    multiple_wordforms=multiple_wordforms,
 432                )
 433            ]
 434        else:
 435            return [{**self.prefix_attrs}]
 436
 437    def get_df(self, **kwargs):
 438        """
 439        Get a DataFrame representation of the entity and its children.
 440
 441        Args:
 442            **kwargs: Additional arguments to pass to get_ld.
 443
 444        Returns:
 445            DataFrame: A DataFrame representation of the entity and its children.
 446        """
 447        odf = pd.DataFrame(self.get_ld(**kwargs))
 448        for c in DF_BADCOLS:
 449            if c in set(odf.columns):
 450                odf = odf.drop(c, axis=1)
 451        for c in odf:
 452            if c.endswith("_num"):
 453                odf[c] = odf[c].fillna(0).apply(int)
 454            else:
 455                odf[c] = odf[c].fillna("")
 456        odf = setindex(odf, DF_INDEX)
 457
 458        def unbool(x):
 459            if x is True:
 460                return 1
 461            if x is False:
 462                return 0
 463            if x is None:
 464                return 0
 465            return x
 466
 467        odf = odf.applymap(unbool)
 468        return odf
 469
 470    @cached_property
 471    def df(self):
 472        """
 473        Get a DataFrame representation of the entity and its children.
 474
 475        Returns:
 476            DataFrame: A DataFrame representation of the entity and its children.
 477        """
 478        return self.get_df()
 479
 480    def __getattr__(self, attr):
 481        """
 482        Get an attribute of the entity by name.
 483
 484        Args:
 485            attr (str): The name of the attribute.
 486
 487        Returns:
 488            Any: The value of the attribute.
 489        """
 490        objs = {
 491            "stanza": "stanzas",
 492            "line": "lines",
 493            "word": "wordtokens",
 494            "wordtoken": "wordtokens",
 495            "wordtype": "wordtypes",
 496            "wordform": "wordforms",
 497            "syllable": "syllables",
 498            "phoneme": "phonemes",
 499        }
 500        if attr[-1].isdigit():
 501            for pref, lname in objs.items():
 502                if attr.startswith(pref) and attr[len(pref) :].isdigit():
 503                    num = int(attr[len(pref) :])
 504                    try:
 505                        return getattr(self, lname)[num - 1]
 506                    except IndexError:
 507                        logger.warning(f"no {pref} at that number")
 508                        return
 509
 510    def get_parent(self, parent_type=None):
 511        """
 512        Get the parent entity of a specific type.
 513
 514        Args:
 515            parent_type (str): The type of parent entity to find.
 516
 517        Returns:
 518            Entity: The parent entity of the specified type, or None if not found.
 519        """
 520        logger.trace(self.__class__.__name__)
 521        if not hasattr(self, "parent") or not self.parent:
 522            return
 523        if self.parent.__class__.__name__ == parent_type:
 524            return self.parent
 525        return self.parent.get_parent(parent_type)
 526
 527    @cached_property
 528    def stanzas(self):
 529        """
 530        Get the list of stanza entities.
 531
 532        Returns:
 533            StanzaList: A list of stanza entities.
 534        """
 535        from .texts import StanzaList
 536
 537        if self.is_text:
 538            o = self.children
 539        elif self.is_stanza:
 540            o = [self]
 541        else:
 542            o = []
 543        return StanzaList(o)
 544
 545    @property
 546    def line_r(self):
 547        """
 548        Get a random line entity.
 549
 550        Returns:
 551            Line: A random line entity, or None if no lines exist.
 552        """
 553        return random.choice(self.lines) if self.lines else None
 554
 555    @property
 556    def word_r(self):
 557        """
 558        Get a random word entity.
 559
 560        Returns:
 561            WordToken: A random word entity, or None if no words exist.
 562        """
 563        return random.choice(self.words) if self.words else None
 564
 565    @cached_property
 566    def lines(self):
 567        """
 568        Get the list of line entities.
 569
 570        Returns:
 571            LineList: A list of line entities.
 572        """
 573        from .texts import LineList
 574
 575        if self.is_stanza:
 576            o = self.children
 577        elif self.is_line:
 578            o = [self]
 579        else:
 580            o = [line for stanza in self.stanzas for line in stanza.children]
 581        return LineList(o)
 582
 583    @cached_property
 584    def wordtokens(self):
 585        """
 586        Get the list of word token entities.
 587
 588        Returns:
 589            WordTokenList: A list of word token entities.
 590        """
 591        from .words import WordTokenList
 592
 593        if self.is_line:
 594            o = self.children
 595        elif self.is_wordtoken:
 596            o = [self]
 597        else:
 598            o = [wt for line in self.lines for wt in line.children]
 599        return WordTokenList(o)
 600
 601    @property
 602    def words(self):
 603        """
 604        Get the list of word token entities.
 605
 606        Returns:
 607            WordTokenList: A list of word token entities.
 608        """
 609        return self.wordtokens
 610
 611    @cached_property
 612    def wordtypes(self):
 613        """
 614        Get the list of word type entities.
 615
 616        Returns:
 617            WordTypeList: A list of word type entities.
 618        """
 619        from .words import WordTypeList
 620
 621        if self.is_wordtoken:
 622            o = self.children
 623        elif self.is_wordtype:
 624            o = [self]
 625        else:
 626            o = [wtype for token in self.wordtokens for wtype in token.children]
 627        return WordTypeList(o)
 628
 629    @cached_property
 630    def wordforms(self):
 631        """
 632        Get the list of word form entities.
 633
 634        Returns:
 635            WordFormList: A list of word form entities.
 636        """
 637        from .words import WordFormList
 638
 639        if self.is_wordtype:
 640            o = self.children[:1]
 641        elif self.is_wordtype:
 642            o = [self]
 643        else:
 644            o = [wtype.children[0] for wtype in self.wordtypes if wtype.children]
 645        return WordFormList(o)
 646
 647    @cached_property
 648    def wordforms_nopunc(self):
 649        """
 650        Get the list of word form entities, excluding punctuation.
 651
 652        Returns:
 653            list: A list of word form entities, excluding punctuation.
 654        """
 655        return [wf for wf in self.wordforms if not wf.parent.is_punc]
 656
 657    @cached_property
 658    def wordforms_all(self):
 659        """
 660        Get the list of all word form entities.
 661
 662        Returns:
 663            list: A list of all word form entities.
 664        """
 665        if self.is_wordtype:
 666            o = self.children
 667        if self.is_wordform:
 668            o = [self]
 669        else:
 670            o = [wtype.children for wtype in self.wordtypes]
 671        return o
 672
 673    @cached_property
 674    def syllables(self):
 675        """
 676        Get the list of syllable entities.
 677
 678        Returns:
 679            SyllableList: A list of syllable entities.
 680        """
 681        from .words import SyllableList
 682
 683        if self.is_wordform:
 684            o = self.children
 685        if self.is_syll:
 686            o = [self]
 687        else:
 688            o = [syll for wf in self.wordforms for syll in wf.children]
 689        return SyllableList(o)
 690
 691    @cached_property
 692    def phonemes(self):
 693        """
 694        Get the list of phoneme entities.
 695
 696        Returns:
 697            PhonemeList: A list of phoneme entities.
 698        """
 699        from .words import PhonemeList
 700
 701        if self.is_syll:
 702            o = self.children
 703        if self.is_phon:
 704            o = [self]
 705        else:
 706            o = [phon for syll in self.syllables for phon in syll.children]
 707        return PhonemeList(o)
 708
 709    @cached_property
 710    def text(self):
 711        """
 712        Get the parent text entity.
 713
 714        Returns:
 715            Text: The parent text entity, or None if not found.
 716        """
 717        return self.get_parent("Text")
 718
 719    @cached_property
 720    def stanza(self):
 721        """
 722        Get the parent stanza entity.
 723
 724        Returns:
 725            Stanza: The parent stanza entity, or None if not found.
 726        """
 727        return self.get_parent("Stanza")
 728
 729    @cached_property
 730    def line(self):
 731        """
 732        Get the parent line entity.
 733
 734        Returns:
 735            Line: The parent line entity, or None if not found.
 736        """
 737        return self.get_parent("Line")
 738
 739    @cached_property
 740    def wordtoken(self):
 741        """
 742        Get the parent word token entity.
 743
 744        Returns:
 745            WordToken: The parent word token entity, or None if not found.
 746        """
 747        return self.get_parent("WordToken")
 748
 749    @cached_property
 750    def wordtype(self):
 751        """
 752        Get the parent word type entity.
 753
 754        Returns:
 755            WordType: The parent word type entity, or None if not found.
 756        """
 757        return self.get_parent("WordType")
 758
 759    @cached_property
 760    def wordform(self):
 761        """
 762        Get the parent word form entity.
 763
 764        Returns:
 765            WordForm: The parent word form entity, or None if not found.
 766        """
 767        return self.get_parent("WordForm")
 768
 769    @cached_property
 770    def syllable(self):
 771        """
 772        Get the parent syllable entity.
 773
 774        Returns:
 775            Syllable: The parent syllable entity, or None if not found.
 776        """
 777        return self.get_parent("Syllable")
 778
 779    @cached_property
 780    def i(self):
 781        """
 782        Get the index of the entity in its parent's children list.
 783
 784        Returns:
 785            int: The index of the entity, or None if not found.
 786        """
 787        if self.parent is None:
 788            return None
 789        if not self.parent.children:
 790            return None
 791        try:
 792            return self.parent.children.index(self)
 793        except IndexError:
 794            return None
 795
 796    @cached_property
 797    def num(self):
 798        """
 799        Get the 1-based index of the entity in its parent's children list.
 800
 801        Returns:
 802            int: The 1-based index of the entity, or None if not found.
 803        """
 804        return self.i + 1 if self.i is not None else None
 805
 806    @cached_property
 807    def next(self):
 808        """
 809        Get the next sibling entity.
 810
 811        Returns:
 812            Entity: The next sibling entity, or None if not found.
 813        """
 814        if self.i is None:
 815            return None
 816        try:
 817            return self.parent.children[self.i + 1]
 818        except IndexError:
 819            return None
 820
 821    @cached_property
 822    def prev(self):
 823        """
 824        Get the previous sibling entity.
 825
 826        Returns:
 827            Entity: The previous sibling entity, or None if not found.
 828        """
 829        if self.i is None:
 830            return None
 831        i = self.i
 832        if i - 1 < 0:
 833            return None
 834        try:
 835            return self.parent.children[i - 1]
 836        except IndexError:
 837            return None
 838
 839    @cached_property
 840    def is_text(self):
 841        """
 842        Check if the entity is a text entity.
 843
 844        Returns:
 845            bool: True if the entity is a text entity, False otherwise.
 846        """
 847        return self.__class__.__name__ == "Text"
 848
 849    @cached_property
 850    def is_stanza(self):
 851        """
 852        Check if the entity is a stanza entity.
 853
 854        Returns:
 855            bool: True if the entity is a stanza entity, False otherwise.
 856        """
 857        return self.__class__.__name__ == "Stanza"
 858
 859    @cached_property
 860    def is_line(self):
 861        """
 862        Check if the entity is a line entity.
 863
 864        Returns:
 865            bool: True if the entity is a line entity, False otherwise.
 866        """
 867        return self.__class__.__name__ == "Line"
 868
 869    @cached_property
 870    def is_wordtoken(self):
 871        """
 872        Check if the entity is a word token entity.
 873
 874        Returns:
 875            bool: True if the entity is a word token entity, False otherwise.
 876        """
 877        return self.__class__.__name__ == "WordToken"
 878
 879    @cached_property
 880    def is_wordtype(self):
 881        """
 882        Check if the entity is a word type entity.
 883
 884        Returns:
 885            bool: True if the entity is a word type entity, False otherwise.
 886        """
 887        return self.__class__.__name__ == "WordType"
 888
 889    @cached_property
 890    def is_wordform(self):
 891        """
 892        Check if the entity is a word form entity.
 893
 894        Returns:
 895            bool: True if the entity is a word form entity, False otherwise.
 896        """
 897        return self.__class__.__name__ == "WordForm"
 898
 899    @cached_property
 900    def is_syll(self):
 901        """
 902        Check if the entity is a syllable entity.
 903
 904        Returns:
 905            bool: True if the entity is a syllable entity, False otherwise.
 906        """
 907        return self.__class__.__name__ == "Syllable"
 908
 909    @cached_property
 910    def is_phon(self):
 911        """
 912        Check if the entity is a phoneme entity.
 913
 914        Returns:
 915            bool: True if the entity is a phoneme entity, False otherwise.
 916        """
 917        return self.__class__.__name__ == "PhonemeClass"
 918
 919    def children_from_cache(self):
 920        """
 921        Get the children of the entity from the cache.
 922
 923        Returns:
 924            list: The list of child entities, or None if not found in the cache.
 925        """
 926        if caching_is_enabled():
 927            res = self.from_cache()
 928            print("FOUND", res)
 929            return None if res is None else res.children
 930
 931    def get_key(self, key):
 932        """
 933        Get a key for caching purposes.
 934
 935        Args:
 936            key: The key object.
 937
 938        Returns:
 939            str: The hashed key.
 940        """
 941        if hasattr(key, "to_hash"):
 942            key = key.to_hash()
 943        elif key:
 944            key = hashstr(key)
 945        return key
 946
 947    def from_cache(self, obj=None, key=None, as_dict=False):
 948        """
 949        Get an object from the cache.
 950
 951        Args:
 952            obj: The object to cache.
 953            key: The key for the cache.
 954            as_dict (bool): If True, return the cached data as a dictionary.
 955
 956        Returns:
 957            Any: The cached object, or None if not found.
 958        """
 959        if obj is None:
 960            obj = self
 961        key = self.get_key(obj) if not key else key
 962        if key and self.use_cache != False:
 963            cache = self.get_cache()
 964            if key in cache:
 965                dat = cache[key]
 966                if dat:
 967                    return from_json(dat) if not as_dict else dat
 968
 969    def get_cache(self):
 970        """
 971        Get the cache object.
 972
 973        Returns:
 974            SimpleCache: The cache object.
 975        """
 976        return SimpleCache()
 977
 978    def cache(
 979        self, key_obj=None, val_obj=None, key=None, force=False
 980    ):
 981        """
 982        Cache an object.
 983
 984        Args:
 985            key_obj: The object to use as the cache key.
 986            val_obj: The object to cache.
 987            key: An optional key for the cache.
 988            force (bool): If True, force the cache to be updated.
 989        """
 990        if key_obj is None:
 991            key_obj = self
 992        if val_obj is None:
 993            val_obj = key_obj
 994        logger.trace(f"key_obj = {key_obj}")
 995        logger.trace(f"val_obj = {val_obj}")
 996        key = self.get_key(key_obj) if not key else key
 997        cache = self.get_cache()
 998        if key and (force or not key in cache):
 999            with logmap(f"saving object under key {key[:8]}"):
1000                with logmap("exporting to json", level="trace"):
1001                    data = val_obj.to_json()
1002                with logmap("uploading json to cache", level="trace"):
1003                    cache[key] = data
1004
1005
1006class EntityList(Entity):
1007    """
1008    A list of Entity objects.
1009    """
1010
1011    def __init__(self, children=[], parent=None, **kwargs):
1012        """
1013        Initialize an EntityList object.
1014
1015        Args:
1016            children (list): List of child entities.
1017            parent (Entity): The parent entity.
1018            **kwargs: Additional attributes to set on the entity.
1019        """
1020        self.parent = parent
1021        self.children = [x for x in children]
1022        self._attrs = kwargs
1023        self._txt = None
1024        for k, v in self._attrs.items():
1025            setattr(self, k, v)
1026
1027    @cached_property
1028    def txt(self):
1029        """
1030        Get the text content of the entity list.
1031
1032        Returns:
1033            None: Always returns None for EntityList objects.
1034        """
1035        return None
class Entity(collections.UserList):
   6class Entity(UserList):
   7    """
   8    Root Entity class representing a hierarchical structure in prosodic analysis.
   9
  10    This class serves as the base for various prosodic entities such as texts, stanzas,
  11    lines, words, syllables, and phonemes. It provides common functionality for
  12    managing hierarchical relationships, attributes, and data representation.
  13
  14    Attributes:
  15        child_type (str): The type of child entities this entity can contain.
  16        is_parseable (bool): Whether this entity can be parsed.
  17        index_name (str): The name used for indexing this entity type.
  18        prefix (str): A prefix used for attribute naming.
  19        list_type (type): The type of list used for storing children.
  20        cached_properties_to_clear (list): Properties to clear from cache.
  21        use_cache (bool): Whether to use caching for this entity.
  22        sep (str): Separator used when joining child texts.
  23    """
  24
  25    child_type = "Text"
  26    is_parseable = False
  27    index_name = None
  28    prefix = "ent"
  29    list_type = None
  30    cached_properties_to_clear = []
  31    use_cache = False
  32    sep = ""
  33
  34    def __init__(self, txt: str = "", children=[], parent=None, **kwargs):
  35        """
  36        Initialize an Entity object.
  37
  38        Args:
  39            txt (str): The text content of the entity.
  40            children (list): List of child entities.
  41            parent (Entity): The parent entity.
  42            **kwargs: Additional attributes to set on the entity.
  43        """
  44        self.parent = parent
  45        newchildren = []
  46        for child in children:
  47            if not isinstance(child, Entity):
  48                logger.warning(f"{child} is not an Entity")
  49                continue
  50            newchildren.append(child)
  51            # if not child.is_wordtype:   # don't do this for wordtypes since each wordtype is a single/shared python object
  52            child.parent = self
  53        children = newchildren
  54        if self.list_type is None:
  55            self.list_type = 'EntityList'
  56        from .imports import GLOBALS
  57        self.children = GLOBALS[self.list_type](children)
  58        self._attrs = kwargs
  59        self._txt = txt
  60        self._mtr = None
  61        for k, v in self._attrs.items():
  62            setattr(self, k, v)
  63
  64    def __iter__(self):
  65        """
  66        Iterate over the children of this entity.
  67
  68        Yields:
  69            Entity: The next child entity.
  70        """
  71        yield from self.children
  72
  73    def to_hash(self):
  74        """
  75        Generate a hash representation of the entity.
  76
  77        Returns:
  78            str: A hash string representing the entity's content and attributes.
  79        """
  80        return hashstr(
  81            self.txt, tuple(sorted(self._attrs.items())), self.__class__.__name__
  82        )
  83
  84    @cached_property
  85    def html(self):
  86        """
  87        Get the HTML representation of the entity.
  88
  89        Returns:
  90            str: HTML representation of the entity, if available.
  91        """
  92        if hasattr(self, "to_html"):
  93            return self.to_html()
  94
  95    @cached_property
  96    def key(self):
  97        """
  98        Generate a unique key for the entity.
  99
 100        Returns:
 101            str: A string key representing the entity's class and attributes.
 102        """
 103        attrs = {
 104            **{k: v for k, v in self.attrs.items() if v is not None},
 105            "txt": self._txt,
 106        }
 107        return f"{self.__class__.__name__}({get_attr_str(attrs)})"
 108
 109    @cached_property
 110    def hash(self):
 111        """
 112        Get a hash value for the entity.
 113
 114        Returns:
 115            str: A hash string for the entity.
 116        """
 117        return hashstr(self.key)
 118
 119    def __hash__(self):
 120        """
 121        Get the hash value for use in hash-based collections.
 122
 123        Returns:
 124            int: The hash value of the entity.
 125        """
 126        return hash(self.hash)
 127
 128    def __eq__(self, other):
 129        """
 130        Check if this entity is equal to another.
 131
 132        Args:
 133            other: The other object to compare with.
 134
 135        Returns:
 136            bool: True if the objects are the same instance, False otherwise.
 137        """
 138        return self is other
 139
 140    def __bool__(self):
 141        """
 142        Check if the entity is considered True in a boolean context.
 143
 144        Returns:
 145            bool: Always returns True for Entity objects.
 146        """
 147        return True
 148
 149    def to_json(self, fn=None, no_txt=False, yes_txt=False, **kwargs):
 150        """
 151        Convert the entity to a JSON representation.
 152
 153        Args:
 154            fn (str, optional): Filename to save the JSON output.
 155            no_txt (bool): If True, exclude the text content.
 156            yes_txt (bool): If True, include the full text content.
 157            **kwargs: Additional key-value pairs to include in the JSON.
 158
 159        Returns:
 160            dict: A dictionary representation of the entity.
 161        """
 162        txt = (self._txt if not yes_txt else self.txt) if not no_txt else None
 163        return to_json(
 164            {
 165                "_class": self.__class__.__name__,
 166                **({"txt": txt} if txt is not None and (yes_txt or txt) else {}),
 167                "children": [kid.to_json() for kid in self.children],
 168                **kwargs,
 169            },
 170            fn=fn,
 171        )
 172
 173    def save(self, fn, **kwargs):
 174        """
 175        Save the entity to a file in JSON format.
 176
 177        Args:
 178            fn (str): The filename to save to.
 179            **kwargs: Additional arguments to pass to to_json.
 180
 181        Returns:
 182            The result of to_json with the given filename.
 183        """
 184        return self.to_json(fn=fn, **kwargs)
 185
 186    def render(self, as_str=False):
 187        """
 188        Render the entity as HTML.
 189
 190        Args:
 191            as_str (bool): If True, return the result as a string.
 192
 193        Returns:
 194            str or HTML: The rendered HTML representation of the entity.
 195        """
 196        return self.to_html(as_str=as_str)
 197
 198    @staticmethod
 199    def from_json(json_d):
 200        """
 201        Create an Entity object from a JSON dictionary.
 202
 203        Args:
 204            json_d (dict): A dictionary containing the entity data.
 205
 206        Returns:
 207            Entity: An instance of the appropriate Entity subclass.
 208        """
 209        from .imports import GLOBALS, CHILDCLASSES
 210
 211        classname = json_d["_class"]
 212        classx = GLOBALS[classname]
 213        childx = CHILDCLASSES.get(classname)
 214        children = json_d.get("children", [])
 215        inpd = {k: v for k, v in json_d.items() if k not in {"children", "_class"}}
 216        if children and childx:
 217            children = [childx.from_json(d) for d in json_d["children"]]
 218        return classx(children=tuple(children), **inpd)
 219
 220    @property
 221    def attrs(self):
 222        """
 223        Get the attributes of the entity.
 224
 225        Returns:
 226            dict: A dictionary of the entity's attributes.
 227        """
 228        odx = {"num": self.num}
 229        if (
 230            self.__class__.__name__
 231            not in {"Text", "Stanza", "MeterLine", "MeterText", "Meter"}
 232            and self.txt
 233        ):
 234            odx["txt"] = self.txt
 235        return {**odx, **self._attrs}
 236
 237    @cached_property
 238    def prefix_attrs(self, with_parent=True):
 239        """
 240        Get the attributes of the entity with a prefix.
 241
 242        Args:
 243            with_parent (bool): If True, include parent attributes.
 244
 245        Returns:
 246            dict: A dictionary of the entity's attributes with a prefix.
 247        """
 248
 249        def getkey(k):
 250            o = f"{self.prefix}_{k}"
 251            o = DF_COLS_RENAME.get(o, o)
 252            return o
 253
 254        odx = {getkey(k): v for k, v in self.attrs.items() if v is not None}
 255        if with_parent and self.parent:
 256            return {**self.parent.prefix_attrs, **odx}
 257        return odx
 258
 259    @cached_property
 260    def txt(self):
 261        """
 262        Get the text content of the entity.
 263
 264        Returns:
 265            str: The text content of the entity.
 266        """
 267        if self._txt:
 268            txt = self._txt
 269        elif self.children:
 270            txt = self.child_class.sep.join(child.txt for child in self.children)
 271        else:
 272            txt = ""
 273        return clean_text(txt)
 274
 275    @cached_property
 276    def data(self):
 277        """
 278        Get the data associated with the entity.
 279
 280        Returns:
 281            list: The list of child entities.
 282        """
 283        return self.children
 284
 285    @cached_property
 286    def l(self):
 287        """
 288        Get the list of child entities.
 289
 290        Returns:
 291            list: The list of child entities.
 292        """
 293        return self.children
 294
 295    def clear_cached_properties(self):
 296        """
 297        Clear cached properties to free up memory.
 298        """
 299        for prop in self.cached_properties_to_clear:
 300            if prop in self.__dict__:
 301                del self.__dict__[prop]
 302            # elif hasattr(self,prop):
 303            #     try:
 304            #         func = getattr(self,prop)
 305            #         func.clear_cache()
 306            #     except AttributeError:
 307            #         pass
 308
 309    def inspect(self, indent=0, maxlines=None, incl_phons=False):
 310        """
 311        Inspect the entity and its children.
 312
 313        Args:
 314            indent (int): The indentation level for the output.
 315            maxlines (int): The maximum number of lines to display.
 316            incl_phons (bool): If True, include phoneme information.
 317        """
 318        attrstr = get_attr_str(self.attrs)
 319        myself = f"{self.__class__.__name__}({attrstr})"
 320        if indent:
 321            myself = textwrap.indent(myself, "|" + (" " * (indent - 1)))
 322        lines = [myself]
 323        for child in self.children:
 324            if isinstance(child, Entity) and (
 325                incl_phons or not child.__class__.__name__.startswith("Phoneme")
 326            ):
 327                lines.append(
 328                    child.inspect(indent=indent + 4, incl_phons=incl_phons).replace(
 329                        "PhonemeClass", "Phoneme"
 330                    )
 331                )
 332        # self.__class__.__name__ in {'Text', 'Stanza', 'Line'}
 333        dblbreakfor = False
 334        breakstr = "\n|\n" if dblbreakfor else "\n"
 335        o = breakstr.join(lines)
 336        if not indent:
 337            if maxlines:
 338                o = "\n".join(o.split("\n")[:maxlines])
 339            print(o)
 340        else:
 341            return o
 342
 343    def _repr_html_(self, df=None):
 344        """
 345        Get the HTML representation of the entity.
 346
 347        Args:
 348            df (DataFrame): An optional DataFrame to use for rendering.
 349
 350        Returns:
 351            str: The HTML representation of the entity.
 352        """
 353
 354        def blank(x):
 355            if x in {None, np.nan}:
 356                return ""
 357            return x
 358
 359        return (self.df if df is None else df).applymap(blank)._repr_html_()
 360
 361    def __repr__(self, attrs=None, bad_keys=None):
 362        """
 363        Get a string representation of the entity.
 364
 365        Args:
 366            attrs (dict): An optional dictionary of attributes to use.
 367            bad_keys (list): An optional list of keys to exclude.
 368
 369        Returns:
 370            str: A string representation of the entity.
 371        """
 372        d = {
 373            k: v
 374            for k, v in (
 375                attrs
 376                if attrs is not None
 377                else (self.attrs if self.attrs is not None else self._attrs)
 378            ).items()
 379        }
 380        return f"{self.__class__.__name__}({get_attr_str(d, bad_keys=bad_keys)})"
 381
 382    @cached_property
 383    def ld(self):
 384        """
 385        Get a list of dictionaries representing the entity and its children.
 386
 387        Returns:
 388            list: A list of dictionaries representing the entity and its children.
 389        """
 390        return self.get_ld()
 391
 392    @cached_property
 393    def child_class(self):
 394        """
 395        Get the class of the child entities.
 396
 397        Returns:
 398            type: The class of the child entities.
 399        """
 400        from .imports import GLOBALS
 401
 402        return GLOBALS.get(self.child_type)
 403
 404    def get_ld(self, incl_phons=False, incl_sylls=True, multiple_wordforms=True):
 405        """
 406        Get a list of dictionaries representing the entity and its children.
 407
 408        Args:
 409            incl_phons (bool): If True, include phoneme information.
 410            incl_sylls (bool): If True, include syllable information.
 411            multiple_wordforms (bool): If True, include multiple word forms.
 412
 413        Returns:
 414            list: A list of dictionaries representing the entity and its children.
 415        """
 416        if not incl_sylls and self.child_type == "Syllable":
 417            return [{**self.prefix_attrs}]
 418        if not incl_phons and self.child_type == "Phoneme":
 419            return [{**self.prefix_attrs}]
 420        good_children = [c for c in self.children if isinstance(c, Entity)]
 421        # logger.debug(f'good children of {type(self)} -> {good_children}')
 422        if not multiple_wordforms and self.child_type == "WordForm" and good_children:
 423            good_children = good_children[:1]
 424            # logger.debug(f'good children now {good_children}')
 425        if good_children:
 426            return [
 427                {**self.prefix_attrs, **child.prefix_attrs, **grandchild_d}
 428                for child in good_children
 429                for grandchild_d in child.get_ld(
 430                    incl_phons=incl_phons,
 431                    incl_sylls=incl_sylls,
 432                    multiple_wordforms=multiple_wordforms,
 433                )
 434            ]
 435        else:
 436            return [{**self.prefix_attrs}]
 437
 438    def get_df(self, **kwargs):
 439        """
 440        Get a DataFrame representation of the entity and its children.
 441
 442        Args:
 443            **kwargs: Additional arguments to pass to get_ld.
 444
 445        Returns:
 446            DataFrame: A DataFrame representation of the entity and its children.
 447        """
 448        odf = pd.DataFrame(self.get_ld(**kwargs))
 449        for c in DF_BADCOLS:
 450            if c in set(odf.columns):
 451                odf = odf.drop(c, axis=1)
 452        for c in odf:
 453            if c.endswith("_num"):
 454                odf[c] = odf[c].fillna(0).apply(int)
 455            else:
 456                odf[c] = odf[c].fillna("")
 457        odf = setindex(odf, DF_INDEX)
 458
 459        def unbool(x):
 460            if x is True:
 461                return 1
 462            if x is False:
 463                return 0
 464            if x is None:
 465                return 0
 466            return x
 467
 468        odf = odf.applymap(unbool)
 469        return odf
 470
 471    @cached_property
 472    def df(self):
 473        """
 474        Get a DataFrame representation of the entity and its children.
 475
 476        Returns:
 477            DataFrame: A DataFrame representation of the entity and its children.
 478        """
 479        return self.get_df()
 480
 481    def __getattr__(self, attr):
 482        """
 483        Get an attribute of the entity by name.
 484
 485        Args:
 486            attr (str): The name of the attribute.
 487
 488        Returns:
 489            Any: The value of the attribute.
 490        """
 491        objs = {
 492            "stanza": "stanzas",
 493            "line": "lines",
 494            "word": "wordtokens",
 495            "wordtoken": "wordtokens",
 496            "wordtype": "wordtypes",
 497            "wordform": "wordforms",
 498            "syllable": "syllables",
 499            "phoneme": "phonemes",
 500        }
 501        if attr[-1].isdigit():
 502            for pref, lname in objs.items():
 503                if attr.startswith(pref) and attr[len(pref) :].isdigit():
 504                    num = int(attr[len(pref) :])
 505                    try:
 506                        return getattr(self, lname)[num - 1]
 507                    except IndexError:
 508                        logger.warning(f"no {pref} at that number")
 509                        return
 510
 511    def get_parent(self, parent_type=None):
 512        """
 513        Get the parent entity of a specific type.
 514
 515        Args:
 516            parent_type (str): The type of parent entity to find.
 517
 518        Returns:
 519            Entity: The parent entity of the specified type, or None if not found.
 520        """
 521        logger.trace(self.__class__.__name__)
 522        if not hasattr(self, "parent") or not self.parent:
 523            return
 524        if self.parent.__class__.__name__ == parent_type:
 525            return self.parent
 526        return self.parent.get_parent(parent_type)
 527
 528    @cached_property
 529    def stanzas(self):
 530        """
 531        Get the list of stanza entities.
 532
 533        Returns:
 534            StanzaList: A list of stanza entities.
 535        """
 536        from .texts import StanzaList
 537
 538        if self.is_text:
 539            o = self.children
 540        elif self.is_stanza:
 541            o = [self]
 542        else:
 543            o = []
 544        return StanzaList(o)
 545
 546    @property
 547    def line_r(self):
 548        """
 549        Get a random line entity.
 550
 551        Returns:
 552            Line: A random line entity, or None if no lines exist.
 553        """
 554        return random.choice(self.lines) if self.lines else None
 555
 556    @property
 557    def word_r(self):
 558        """
 559        Get a random word entity.
 560
 561        Returns:
 562            WordToken: A random word entity, or None if no words exist.
 563        """
 564        return random.choice(self.words) if self.words else None
 565
 566    @cached_property
 567    def lines(self):
 568        """
 569        Get the list of line entities.
 570
 571        Returns:
 572            LineList: A list of line entities.
 573        """
 574        from .texts import LineList
 575
 576        if self.is_stanza:
 577            o = self.children
 578        elif self.is_line:
 579            o = [self]
 580        else:
 581            o = [line for stanza in self.stanzas for line in stanza.children]
 582        return LineList(o)
 583
 584    @cached_property
 585    def wordtokens(self):
 586        """
 587        Get the list of word token entities.
 588
 589        Returns:
 590            WordTokenList: A list of word token entities.
 591        """
 592        from .words import WordTokenList
 593
 594        if self.is_line:
 595            o = self.children
 596        elif self.is_wordtoken:
 597            o = [self]
 598        else:
 599            o = [wt for line in self.lines for wt in line.children]
 600        return WordTokenList(o)
 601
 602    @property
 603    def words(self):
 604        """
 605        Get the list of word token entities.
 606
 607        Returns:
 608            WordTokenList: A list of word token entities.
 609        """
 610        return self.wordtokens
 611
 612    @cached_property
 613    def wordtypes(self):
 614        """
 615        Get the list of word type entities.
 616
 617        Returns:
 618            WordTypeList: A list of word type entities.
 619        """
 620        from .words import WordTypeList
 621
 622        if self.is_wordtoken:
 623            o = self.children
 624        elif self.is_wordtype:
 625            o = [self]
 626        else:
 627            o = [wtype for token in self.wordtokens for wtype in token.children]
 628        return WordTypeList(o)
 629
 630    @cached_property
 631    def wordforms(self):
 632        """
 633        Get the list of word form entities.
 634
 635        Returns:
 636            WordFormList: A list of word form entities.
 637        """
 638        from .words import WordFormList
 639
 640        if self.is_wordtype:
 641            o = self.children[:1]
 642        elif self.is_wordtype:
 643            o = [self]
 644        else:
 645            o = [wtype.children[0] for wtype in self.wordtypes if wtype.children]
 646        return WordFormList(o)
 647
 648    @cached_property
 649    def wordforms_nopunc(self):
 650        """
 651        Get the list of word form entities, excluding punctuation.
 652
 653        Returns:
 654            list: A list of word form entities, excluding punctuation.
 655        """
 656        return [wf for wf in self.wordforms if not wf.parent.is_punc]
 657
 658    @cached_property
 659    def wordforms_all(self):
 660        """
 661        Get the list of all word form entities.
 662
 663        Returns:
 664            list: A list of all word form entities.
 665        """
 666        if self.is_wordtype:
 667            o = self.children
 668        if self.is_wordform:
 669            o = [self]
 670        else:
 671            o = [wtype.children for wtype in self.wordtypes]
 672        return o
 673
 674    @cached_property
 675    def syllables(self):
 676        """
 677        Get the list of syllable entities.
 678
 679        Returns:
 680            SyllableList: A list of syllable entities.
 681        """
 682        from .words import SyllableList
 683
 684        if self.is_wordform:
 685            o = self.children
 686        if self.is_syll:
 687            o = [self]
 688        else:
 689            o = [syll for wf in self.wordforms for syll in wf.children]
 690        return SyllableList(o)
 691
 692    @cached_property
 693    def phonemes(self):
 694        """
 695        Get the list of phoneme entities.
 696
 697        Returns:
 698            PhonemeList: A list of phoneme entities.
 699        """
 700        from .words import PhonemeList
 701
 702        if self.is_syll:
 703            o = self.children
 704        if self.is_phon:
 705            o = [self]
 706        else:
 707            o = [phon for syll in self.syllables for phon in syll.children]
 708        return PhonemeList(o)
 709
 710    @cached_property
 711    def text(self):
 712        """
 713        Get the parent text entity.
 714
 715        Returns:
 716            Text: The parent text entity, or None if not found.
 717        """
 718        return self.get_parent("Text")
 719
 720    @cached_property
 721    def stanza(self):
 722        """
 723        Get the parent stanza entity.
 724
 725        Returns:
 726            Stanza: The parent stanza entity, or None if not found.
 727        """
 728        return self.get_parent("Stanza")
 729
 730    @cached_property
 731    def line(self):
 732        """
 733        Get the parent line entity.
 734
 735        Returns:
 736            Line: The parent line entity, or None if not found.
 737        """
 738        return self.get_parent("Line")
 739
 740    @cached_property
 741    def wordtoken(self):
 742        """
 743        Get the parent word token entity.
 744
 745        Returns:
 746            WordToken: The parent word token entity, or None if not found.
 747        """
 748        return self.get_parent("WordToken")
 749
 750    @cached_property
 751    def wordtype(self):
 752        """
 753        Get the parent word type entity.
 754
 755        Returns:
 756            WordType: The parent word type entity, or None if not found.
 757        """
 758        return self.get_parent("WordType")
 759
 760    @cached_property
 761    def wordform(self):
 762        """
 763        Get the parent word form entity.
 764
 765        Returns:
 766            WordForm: The parent word form entity, or None if not found.
 767        """
 768        return self.get_parent("WordForm")
 769
 770    @cached_property
 771    def syllable(self):
 772        """
 773        Get the parent syllable entity.
 774
 775        Returns:
 776            Syllable: The parent syllable entity, or None if not found.
 777        """
 778        return self.get_parent("Syllable")
 779
 780    @cached_property
 781    def i(self):
 782        """
 783        Get the index of the entity in its parent's children list.
 784
 785        Returns:
 786            int: The index of the entity, or None if not found.
 787        """
 788        if self.parent is None:
 789            return None
 790        if not self.parent.children:
 791            return None
 792        try:
 793            return self.parent.children.index(self)
 794        except IndexError:
 795            return None
 796
 797    @cached_property
 798    def num(self):
 799        """
 800        Get the 1-based index of the entity in its parent's children list.
 801
 802        Returns:
 803            int: The 1-based index of the entity, or None if not found.
 804        """
 805        return self.i + 1 if self.i is not None else None
 806
 807    @cached_property
 808    def next(self):
 809        """
 810        Get the next sibling entity.
 811
 812        Returns:
 813            Entity: The next sibling entity, or None if not found.
 814        """
 815        if self.i is None:
 816            return None
 817        try:
 818            return self.parent.children[self.i + 1]
 819        except IndexError:
 820            return None
 821
 822    @cached_property
 823    def prev(self):
 824        """
 825        Get the previous sibling entity.
 826
 827        Returns:
 828            Entity: The previous sibling entity, or None if not found.
 829        """
 830        if self.i is None:
 831            return None
 832        i = self.i
 833        if i - 1 < 0:
 834            return None
 835        try:
 836            return self.parent.children[i - 1]
 837        except IndexError:
 838            return None
 839
 840    @cached_property
 841    def is_text(self):
 842        """
 843        Check if the entity is a text entity.
 844
 845        Returns:
 846            bool: True if the entity is a text entity, False otherwise.
 847        """
 848        return self.__class__.__name__ == "Text"
 849
 850    @cached_property
 851    def is_stanza(self):
 852        """
 853        Check if the entity is a stanza entity.
 854
 855        Returns:
 856            bool: True if the entity is a stanza entity, False otherwise.
 857        """
 858        return self.__class__.__name__ == "Stanza"
 859
 860    @cached_property
 861    def is_line(self):
 862        """
 863        Check if the entity is a line entity.
 864
 865        Returns:
 866            bool: True if the entity is a line entity, False otherwise.
 867        """
 868        return self.__class__.__name__ == "Line"
 869
 870    @cached_property
 871    def is_wordtoken(self):
 872        """
 873        Check if the entity is a word token entity.
 874
 875        Returns:
 876            bool: True if the entity is a word token entity, False otherwise.
 877        """
 878        return self.__class__.__name__ == "WordToken"
 879
 880    @cached_property
 881    def is_wordtype(self):
 882        """
 883        Check if the entity is a word type entity.
 884
 885        Returns:
 886            bool: True if the entity is a word type entity, False otherwise.
 887        """
 888        return self.__class__.__name__ == "WordType"
 889
 890    @cached_property
 891    def is_wordform(self):
 892        """
 893        Check if the entity is a word form entity.
 894
 895        Returns:
 896            bool: True if the entity is a word form entity, False otherwise.
 897        """
 898        return self.__class__.__name__ == "WordForm"
 899
 900    @cached_property
 901    def is_syll(self):
 902        """
 903        Check if the entity is a syllable entity.
 904
 905        Returns:
 906            bool: True if the entity is a syllable entity, False otherwise.
 907        """
 908        return self.__class__.__name__ == "Syllable"
 909
 910    @cached_property
 911    def is_phon(self):
 912        """
 913        Check if the entity is a phoneme entity.
 914
 915        Returns:
 916            bool: True if the entity is a phoneme entity, False otherwise.
 917        """
 918        return self.__class__.__name__ == "PhonemeClass"
 919
 920    def children_from_cache(self):
 921        """
 922        Get the children of the entity from the cache.
 923
 924        Returns:
 925            list: The list of child entities, or None if not found in the cache.
 926        """
 927        if caching_is_enabled():
 928            res = self.from_cache()
 929            print("FOUND", res)
 930            return None if res is None else res.children
 931
 932    def get_key(self, key):
 933        """
 934        Get a key for caching purposes.
 935
 936        Args:
 937            key: The key object.
 938
 939        Returns:
 940            str: The hashed key.
 941        """
 942        if hasattr(key, "to_hash"):
 943            key = key.to_hash()
 944        elif key:
 945            key = hashstr(key)
 946        return key
 947
 948    def from_cache(self, obj=None, key=None, as_dict=False):
 949        """
 950        Get an object from the cache.
 951
 952        Args:
 953            obj: The object to cache.
 954            key: The key for the cache.
 955            as_dict (bool): If True, return the cached data as a dictionary.
 956
 957        Returns:
 958            Any: The cached object, or None if not found.
 959        """
 960        if obj is None:
 961            obj = self
 962        key = self.get_key(obj) if not key else key
 963        if key and self.use_cache != False:
 964            cache = self.get_cache()
 965            if key in cache:
 966                dat = cache[key]
 967                if dat:
 968                    return from_json(dat) if not as_dict else dat
 969
 970    def get_cache(self):
 971        """
 972        Get the cache object.
 973
 974        Returns:
 975            SimpleCache: The cache object.
 976        """
 977        return SimpleCache()
 978
 979    def cache(
 980        self, key_obj=None, val_obj=None, key=None, force=False
 981    ):
 982        """
 983        Cache an object.
 984
 985        Args:
 986            key_obj: The object to use as the cache key.
 987            val_obj: The object to cache.
 988            key: An optional key for the cache.
 989            force (bool): If True, force the cache to be updated.
 990        """
 991        if key_obj is None:
 992            key_obj = self
 993        if val_obj is None:
 994            val_obj = key_obj
 995        logger.trace(f"key_obj = {key_obj}")
 996        logger.trace(f"val_obj = {val_obj}")
 997        key = self.get_key(key_obj) if not key else key
 998        cache = self.get_cache()
 999        if key and (force or not key in cache):
1000            with logmap(f"saving object under key {key[:8]}"):
1001                with logmap("exporting to json", level="trace"):
1002                    data = val_obj.to_json()
1003                with logmap("uploading json to cache", level="trace"):
1004                    cache[key] = data

Root Entity class representing a hierarchical structure in prosodic analysis.

This class serves as the base for various prosodic entities such as texts, stanzas, lines, words, syllables, and phonemes. It provides common functionality for managing hierarchical relationships, attributes, and data representation.

Attributes:
  • child_type (str): The type of child entities this entity can contain.
  • is_parseable (bool): Whether this entity can be parsed.
  • index_name (str): The name used for indexing this entity type.
  • prefix (str): A prefix used for attribute naming.
  • list_type (type): The type of list used for storing children.
  • cached_properties_to_clear (list): Properties to clear from cache.
  • use_cache (bool): Whether to use caching for this entity.
  • sep (str): Separator used when joining child texts.
Entity(txt: str = '', children=[], parent=None, **kwargs)
34    def __init__(self, txt: str = "", children=[], parent=None, **kwargs):
35        """
36        Initialize an Entity object.
37
38        Args:
39            txt (str): The text content of the entity.
40            children (list): List of child entities.
41            parent (Entity): The parent entity.
42            **kwargs: Additional attributes to set on the entity.
43        """
44        self.parent = parent
45        newchildren = []
46        for child in children:
47            if not isinstance(child, Entity):
48                logger.warning(f"{child} is not an Entity")
49                continue
50            newchildren.append(child)
51            # if not child.is_wordtype:   # don't do this for wordtypes since each wordtype is a single/shared python object
52            child.parent = self
53        children = newchildren
54        if self.list_type is None:
55            self.list_type = 'EntityList'
56        from .imports import GLOBALS
57        self.children = GLOBALS[self.list_type](children)
58        self._attrs = kwargs
59        self._txt = txt
60        self._mtr = None
61        for k, v in self._attrs.items():
62            setattr(self, k, v)

Initialize an Entity object.

Arguments:
  • txt (str): The text content of the entity.
  • children (list): List of child entities.
  • parent (Entity): The parent entity.
  • **kwargs: Additional attributes to set on the entity.
child_type = 'Text'
is_parseable = False
index_name = None
prefix = 'ent'
list_type = None
cached_properties_to_clear = []
use_cache = False
sep = ''
parent
children
def to_hash(self):
73    def to_hash(self):
74        """
75        Generate a hash representation of the entity.
76
77        Returns:
78            str: A hash string representing the entity's content and attributes.
79        """
80        return hashstr(
81            self.txt, tuple(sorted(self._attrs.items())), self.__class__.__name__
82        )

Generate a hash representation of the entity.

Returns:

str: A hash string representing the entity's content and attributes.

html
84    @cached_property
85    def html(self):
86        """
87        Get the HTML representation of the entity.
88
89        Returns:
90            str: HTML representation of the entity, if available.
91        """
92        if hasattr(self, "to_html"):
93            return self.to_html()

Get the HTML representation of the entity.

Returns:

str: HTML representation of the entity, if available.

key
 95    @cached_property
 96    def key(self):
 97        """
 98        Generate a unique key for the entity.
 99
100        Returns:
101            str: A string key representing the entity's class and attributes.
102        """
103        attrs = {
104            **{k: v for k, v in self.attrs.items() if v is not None},
105            "txt": self._txt,
106        }
107        return f"{self.__class__.__name__}({get_attr_str(attrs)})"

Generate a unique key for the entity.

Returns:

str: A string key representing the entity's class and attributes.

hash
109    @cached_property
110    def hash(self):
111        """
112        Get a hash value for the entity.
113
114        Returns:
115            str: A hash string for the entity.
116        """
117        return hashstr(self.key)

Get a hash value for the entity.

Returns:

str: A hash string for the entity.

def to_json(self, fn=None, no_txt=False, yes_txt=False, **kwargs):
149    def to_json(self, fn=None, no_txt=False, yes_txt=False, **kwargs):
150        """
151        Convert the entity to a JSON representation.
152
153        Args:
154            fn (str, optional): Filename to save the JSON output.
155            no_txt (bool): If True, exclude the text content.
156            yes_txt (bool): If True, include the full text content.
157            **kwargs: Additional key-value pairs to include in the JSON.
158
159        Returns:
160            dict: A dictionary representation of the entity.
161        """
162        txt = (self._txt if not yes_txt else self.txt) if not no_txt else None
163        return to_json(
164            {
165                "_class": self.__class__.__name__,
166                **({"txt": txt} if txt is not None and (yes_txt or txt) else {}),
167                "children": [kid.to_json() for kid in self.children],
168                **kwargs,
169            },
170            fn=fn,
171        )

Convert the entity to a JSON representation.

Arguments:
  • fn (str, optional): Filename to save the JSON output.
  • no_txt (bool): If True, exclude the text content.
  • yes_txt (bool): If True, include the full text content.
  • **kwargs: Additional key-value pairs to include in the JSON.
Returns:

dict: A dictionary representation of the entity.

def save(self, fn, **kwargs):
173    def save(self, fn, **kwargs):
174        """
175        Save the entity to a file in JSON format.
176
177        Args:
178            fn (str): The filename to save to.
179            **kwargs: Additional arguments to pass to to_json.
180
181        Returns:
182            The result of to_json with the given filename.
183        """
184        return self.to_json(fn=fn, **kwargs)

Save the entity to a file in JSON format.

Arguments:
  • fn (str): The filename to save to.
  • **kwargs: Additional arguments to pass to to_json.
Returns:

The result of to_json with the given filename.

def render(self, as_str=False):
186    def render(self, as_str=False):
187        """
188        Render the entity as HTML.
189
190        Args:
191            as_str (bool): If True, return the result as a string.
192
193        Returns:
194            str or HTML: The rendered HTML representation of the entity.
195        """
196        return self.to_html(as_str=as_str)

Render the entity as HTML.

Arguments:
  • as_str (bool): If True, return the result as a string.
Returns:

str or HTML: The rendered HTML representation of the entity.

@staticmethod
def from_json(json_d):
198    @staticmethod
199    def from_json(json_d):
200        """
201        Create an Entity object from a JSON dictionary.
202
203        Args:
204            json_d (dict): A dictionary containing the entity data.
205
206        Returns:
207            Entity: An instance of the appropriate Entity subclass.
208        """
209        from .imports import GLOBALS, CHILDCLASSES
210
211        classname = json_d["_class"]
212        classx = GLOBALS[classname]
213        childx = CHILDCLASSES.get(classname)
214        children = json_d.get("children", [])
215        inpd = {k: v for k, v in json_d.items() if k not in {"children", "_class"}}
216        if children and childx:
217            children = [childx.from_json(d) for d in json_d["children"]]
218        return classx(children=tuple(children), **inpd)

Create an Entity object from a JSON dictionary.

Arguments:
  • json_d (dict): A dictionary containing the entity data.
Returns:

Entity: An instance of the appropriate Entity subclass.

attrs
220    @property
221    def attrs(self):
222        """
223        Get the attributes of the entity.
224
225        Returns:
226            dict: A dictionary of the entity's attributes.
227        """
228        odx = {"num": self.num}
229        if (
230            self.__class__.__name__
231            not in {"Text", "Stanza", "MeterLine", "MeterText", "Meter"}
232            and self.txt
233        ):
234            odx["txt"] = self.txt
235        return {**odx, **self._attrs}

Get the attributes of the entity.

Returns:

dict: A dictionary of the entity's attributes.

prefix_attrs
237    @cached_property
238    def prefix_attrs(self, with_parent=True):
239        """
240        Get the attributes of the entity with a prefix.
241
242        Args:
243            with_parent (bool): If True, include parent attributes.
244
245        Returns:
246            dict: A dictionary of the entity's attributes with a prefix.
247        """
248
249        def getkey(k):
250            o = f"{self.prefix}_{k}"
251            o = DF_COLS_RENAME.get(o, o)
252            return o
253
254        odx = {getkey(k): v for k, v in self.attrs.items() if v is not None}
255        if with_parent and self.parent:
256            return {**self.parent.prefix_attrs, **odx}
257        return odx

Get the attributes of the entity with a prefix.

Arguments:
  • with_parent (bool): If True, include parent attributes.
Returns:

dict: A dictionary of the entity's attributes with a prefix.

txt
259    @cached_property
260    def txt(self):
261        """
262        Get the text content of the entity.
263
264        Returns:
265            str: The text content of the entity.
266        """
267        if self._txt:
268            txt = self._txt
269        elif self.children:
270            txt = self.child_class.sep.join(child.txt for child in self.children)
271        else:
272            txt = ""
273        return clean_text(txt)

Get the text content of the entity.

Returns:

str: The text content of the entity.

data
275    @cached_property
276    def data(self):
277        """
278        Get the data associated with the entity.
279
280        Returns:
281            list: The list of child entities.
282        """
283        return self.children

Get the data associated with the entity.

Returns:

list: The list of child entities.

l
285    @cached_property
286    def l(self):
287        """
288        Get the list of child entities.
289
290        Returns:
291            list: The list of child entities.
292        """
293        return self.children

Get the list of child entities.

Returns:

list: The list of child entities.

def clear_cached_properties(self):
295    def clear_cached_properties(self):
296        """
297        Clear cached properties to free up memory.
298        """
299        for prop in self.cached_properties_to_clear:
300            if prop in self.__dict__:
301                del self.__dict__[prop]
302            # elif hasattr(self,prop):
303            #     try:
304            #         func = getattr(self,prop)
305            #         func.clear_cache()
306            #     except AttributeError:
307            #         pass

Clear cached properties to free up memory.

def inspect(self, indent=0, maxlines=None, incl_phons=False):
309    def inspect(self, indent=0, maxlines=None, incl_phons=False):
310        """
311        Inspect the entity and its children.
312
313        Args:
314            indent (int): The indentation level for the output.
315            maxlines (int): The maximum number of lines to display.
316            incl_phons (bool): If True, include phoneme information.
317        """
318        attrstr = get_attr_str(self.attrs)
319        myself = f"{self.__class__.__name__}({attrstr})"
320        if indent:
321            myself = textwrap.indent(myself, "|" + (" " * (indent - 1)))
322        lines = [myself]
323        for child in self.children:
324            if isinstance(child, Entity) and (
325                incl_phons or not child.__class__.__name__.startswith("Phoneme")
326            ):
327                lines.append(
328                    child.inspect(indent=indent + 4, incl_phons=incl_phons).replace(
329                        "PhonemeClass", "Phoneme"
330                    )
331                )
332        # self.__class__.__name__ in {'Text', 'Stanza', 'Line'}
333        dblbreakfor = False
334        breakstr = "\n|\n" if dblbreakfor else "\n"
335        o = breakstr.join(lines)
336        if not indent:
337            if maxlines:
338                o = "\n".join(o.split("\n")[:maxlines])
339            print(o)
340        else:
341            return o

Inspect the entity and its children.

Arguments:
  • indent (int): The indentation level for the output.
  • maxlines (int): The maximum number of lines to display.
  • incl_phons (bool): If True, include phoneme information.
ld
382    @cached_property
383    def ld(self):
384        """
385        Get a list of dictionaries representing the entity and its children.
386
387        Returns:
388            list: A list of dictionaries representing the entity and its children.
389        """
390        return self.get_ld()

Get a list of dictionaries representing the entity and its children.

Returns:

list: A list of dictionaries representing the entity and its children.

child_class
392    @cached_property
393    def child_class(self):
394        """
395        Get the class of the child entities.
396
397        Returns:
398            type: The class of the child entities.
399        """
400        from .imports import GLOBALS
401
402        return GLOBALS.get(self.child_type)

Get the class of the child entities.

Returns:

type: The class of the child entities.

def get_ld(self, incl_phons=False, incl_sylls=True, multiple_wordforms=True):
404    def get_ld(self, incl_phons=False, incl_sylls=True, multiple_wordforms=True):
405        """
406        Get a list of dictionaries representing the entity and its children.
407
408        Args:
409            incl_phons (bool): If True, include phoneme information.
410            incl_sylls (bool): If True, include syllable information.
411            multiple_wordforms (bool): If True, include multiple word forms.
412
413        Returns:
414            list: A list of dictionaries representing the entity and its children.
415        """
416        if not incl_sylls and self.child_type == "Syllable":
417            return [{**self.prefix_attrs}]
418        if not incl_phons and self.child_type == "Phoneme":
419            return [{**self.prefix_attrs}]
420        good_children = [c for c in self.children if isinstance(c, Entity)]
421        # logger.debug(f'good children of {type(self)} -> {good_children}')
422        if not multiple_wordforms and self.child_type == "WordForm" and good_children:
423            good_children = good_children[:1]
424            # logger.debug(f'good children now {good_children}')
425        if good_children:
426            return [
427                {**self.prefix_attrs, **child.prefix_attrs, **grandchild_d}
428                for child in good_children
429                for grandchild_d in child.get_ld(
430                    incl_phons=incl_phons,
431                    incl_sylls=incl_sylls,
432                    multiple_wordforms=multiple_wordforms,
433                )
434            ]
435        else:
436            return [{**self.prefix_attrs}]

Get a list of dictionaries representing the entity and its children.

Arguments:
  • incl_phons (bool): If True, include phoneme information.
  • incl_sylls (bool): If True, include syllable information.
  • multiple_wordforms (bool): If True, include multiple word forms.
Returns:

list: A list of dictionaries representing the entity and its children.

def get_df(self, **kwargs):
438    def get_df(self, **kwargs):
439        """
440        Get a DataFrame representation of the entity and its children.
441
442        Args:
443            **kwargs: Additional arguments to pass to get_ld.
444
445        Returns:
446            DataFrame: A DataFrame representation of the entity and its children.
447        """
448        odf = pd.DataFrame(self.get_ld(**kwargs))
449        for c in DF_BADCOLS:
450            if c in set(odf.columns):
451                odf = odf.drop(c, axis=1)
452        for c in odf:
453            if c.endswith("_num"):
454                odf[c] = odf[c].fillna(0).apply(int)
455            else:
456                odf[c] = odf[c].fillna("")
457        odf = setindex(odf, DF_INDEX)
458
459        def unbool(x):
460            if x is True:
461                return 1
462            if x is False:
463                return 0
464            if x is None:
465                return 0
466            return x
467
468        odf = odf.applymap(unbool)
469        return odf

Get a DataFrame representation of the entity and its children.

Arguments:
  • **kwargs: Additional arguments to pass to get_ld.
Returns:

DataFrame: A DataFrame representation of the entity and its children.

df
471    @cached_property
472    def df(self):
473        """
474        Get a DataFrame representation of the entity and its children.
475
476        Returns:
477            DataFrame: A DataFrame representation of the entity and its children.
478        """
479        return self.get_df()

Get a DataFrame representation of the entity and its children.

Returns:

DataFrame: A DataFrame representation of the entity and its children.

def get_parent(self, parent_type=None):
511    def get_parent(self, parent_type=None):
512        """
513        Get the parent entity of a specific type.
514
515        Args:
516            parent_type (str): The type of parent entity to find.
517
518        Returns:
519            Entity: The parent entity of the specified type, or None if not found.
520        """
521        logger.trace(self.__class__.__name__)
522        if not hasattr(self, "parent") or not self.parent:
523            return
524        if self.parent.__class__.__name__ == parent_type:
525            return self.parent
526        return self.parent.get_parent(parent_type)

Get the parent entity of a specific type.

Arguments:
  • parent_type (str): The type of parent entity to find.
Returns:

Entity: The parent entity of the specified type, or None if not found.

stanzas
528    @cached_property
529    def stanzas(self):
530        """
531        Get the list of stanza entities.
532
533        Returns:
534            StanzaList: A list of stanza entities.
535        """
536        from .texts import StanzaList
537
538        if self.is_text:
539            o = self.children
540        elif self.is_stanza:
541            o = [self]
542        else:
543            o = []
544        return StanzaList(o)

Get the list of stanza entities.

Returns:

StanzaList: A list of stanza entities.

line_r
546    @property
547    def line_r(self):
548        """
549        Get a random line entity.
550
551        Returns:
552            Line: A random line entity, or None if no lines exist.
553        """
554        return random.choice(self.lines) if self.lines else None

Get a random line entity.

Returns:

Line: A random line entity, or None if no lines exist.

word_r
556    @property
557    def word_r(self):
558        """
559        Get a random word entity.
560
561        Returns:
562            WordToken: A random word entity, or None if no words exist.
563        """
564        return random.choice(self.words) if self.words else None

Get a random word entity.

Returns:

WordToken: A random word entity, or None if no words exist.

lines
566    @cached_property
567    def lines(self):
568        """
569        Get the list of line entities.
570
571        Returns:
572            LineList: A list of line entities.
573        """
574        from .texts import LineList
575
576        if self.is_stanza:
577            o = self.children
578        elif self.is_line:
579            o = [self]
580        else:
581            o = [line for stanza in self.stanzas for line in stanza.children]
582        return LineList(o)

Get the list of line entities.

Returns:

LineList: A list of line entities.

wordtokens
584    @cached_property
585    def wordtokens(self):
586        """
587        Get the list of word token entities.
588
589        Returns:
590            WordTokenList: A list of word token entities.
591        """
592        from .words import WordTokenList
593
594        if self.is_line:
595            o = self.children
596        elif self.is_wordtoken:
597            o = [self]
598        else:
599            o = [wt for line in self.lines for wt in line.children]
600        return WordTokenList(o)

Get the list of word token entities.

Returns:

WordTokenList: A list of word token entities.

words
602    @property
603    def words(self):
604        """
605        Get the list of word token entities.
606
607        Returns:
608            WordTokenList: A list of word token entities.
609        """
610        return self.wordtokens

Get the list of word token entities.

Returns:

WordTokenList: A list of word token entities.

wordtypes
612    @cached_property
613    def wordtypes(self):
614        """
615        Get the list of word type entities.
616
617        Returns:
618            WordTypeList: A list of word type entities.
619        """
620        from .words import WordTypeList
621
622        if self.is_wordtoken:
623            o = self.children
624        elif self.is_wordtype:
625            o = [self]
626        else:
627            o = [wtype for token in self.wordtokens for wtype in token.children]
628        return WordTypeList(o)

Get the list of word type entities.

Returns:

WordTypeList: A list of word type entities.

wordforms
630    @cached_property
631    def wordforms(self):
632        """
633        Get the list of word form entities.
634
635        Returns:
636            WordFormList: A list of word form entities.
637        """
638        from .words import WordFormList
639
640        if self.is_wordtype:
641            o = self.children[:1]
642        elif self.is_wordtype:
643            o = [self]
644        else:
645            o = [wtype.children[0] for wtype in self.wordtypes if wtype.children]
646        return WordFormList(o)

Get the list of word form entities.

Returns:

WordFormList: A list of word form entities.

wordforms_nopunc
648    @cached_property
649    def wordforms_nopunc(self):
650        """
651        Get the list of word form entities, excluding punctuation.
652
653        Returns:
654            list: A list of word form entities, excluding punctuation.
655        """
656        return [wf for wf in self.wordforms if not wf.parent.is_punc]

Get the list of word form entities, excluding punctuation.

Returns:

list: A list of word form entities, excluding punctuation.

wordforms_all
658    @cached_property
659    def wordforms_all(self):
660        """
661        Get the list of all word form entities.
662
663        Returns:
664            list: A list of all word form entities.
665        """
666        if self.is_wordtype:
667            o = self.children
668        if self.is_wordform:
669            o = [self]
670        else:
671            o = [wtype.children for wtype in self.wordtypes]
672        return o

Get the list of all word form entities.

Returns:

list: A list of all word form entities.

syllables
674    @cached_property
675    def syllables(self):
676        """
677        Get the list of syllable entities.
678
679        Returns:
680            SyllableList: A list of syllable entities.
681        """
682        from .words import SyllableList
683
684        if self.is_wordform:
685            o = self.children
686        if self.is_syll:
687            o = [self]
688        else:
689            o = [syll for wf in self.wordforms for syll in wf.children]
690        return SyllableList(o)

Get the list of syllable entities.

Returns:

SyllableList: A list of syllable entities.

phonemes
692    @cached_property
693    def phonemes(self):
694        """
695        Get the list of phoneme entities.
696
697        Returns:
698            PhonemeList: A list of phoneme entities.
699        """
700        from .words import PhonemeList
701
702        if self.is_syll:
703            o = self.children
704        if self.is_phon:
705            o = [self]
706        else:
707            o = [phon for syll in self.syllables for phon in syll.children]
708        return PhonemeList(o)

Get the list of phoneme entities.

Returns:

PhonemeList: A list of phoneme entities.

text
710    @cached_property
711    def text(self):
712        """
713        Get the parent text entity.
714
715        Returns:
716            Text: The parent text entity, or None if not found.
717        """
718        return self.get_parent("Text")

Get the parent text entity.

Returns:

Text: The parent text entity, or None if not found.

stanza
720    @cached_property
721    def stanza(self):
722        """
723        Get the parent stanza entity.
724
725        Returns:
726            Stanza: The parent stanza entity, or None if not found.
727        """
728        return self.get_parent("Stanza")

Get the parent stanza entity.

Returns:

Stanza: The parent stanza entity, or None if not found.

line
730    @cached_property
731    def line(self):
732        """
733        Get the parent line entity.
734
735        Returns:
736            Line: The parent line entity, or None if not found.
737        """
738        return self.get_parent("Line")

Get the parent line entity.

Returns:

Line: The parent line entity, or None if not found.

wordtoken
740    @cached_property
741    def wordtoken(self):
742        """
743        Get the parent word token entity.
744
745        Returns:
746            WordToken: The parent word token entity, or None if not found.
747        """
748        return self.get_parent("WordToken")

Get the parent word token entity.

Returns:

WordToken: The parent word token entity, or None if not found.

wordtype
750    @cached_property
751    def wordtype(self):
752        """
753        Get the parent word type entity.
754
755        Returns:
756            WordType: The parent word type entity, or None if not found.
757        """
758        return self.get_parent("WordType")

Get the parent word type entity.

Returns:

WordType: The parent word type entity, or None if not found.

wordform
760    @cached_property
761    def wordform(self):
762        """
763        Get the parent word form entity.
764
765        Returns:
766            WordForm: The parent word form entity, or None if not found.
767        """
768        return self.get_parent("WordForm")

Get the parent word form entity.

Returns:

WordForm: The parent word form entity, or None if not found.

syllable
770    @cached_property
771    def syllable(self):
772        """
773        Get the parent syllable entity.
774
775        Returns:
776            Syllable: The parent syllable entity, or None if not found.
777        """
778        return self.get_parent("Syllable")

Get the parent syllable entity.

Returns:

Syllable: The parent syllable entity, or None if not found.

i
780    @cached_property
781    def i(self):
782        """
783        Get the index of the entity in its parent's children list.
784
785        Returns:
786            int: The index of the entity, or None if not found.
787        """
788        if self.parent is None:
789            return None
790        if not self.parent.children:
791            return None
792        try:
793            return self.parent.children.index(self)
794        except IndexError:
795            return None

Get the index of the entity in its parent's children list.

Returns:

int: The index of the entity, or None if not found.

num
797    @cached_property
798    def num(self):
799        """
800        Get the 1-based index of the entity in its parent's children list.
801
802        Returns:
803            int: The 1-based index of the entity, or None if not found.
804        """
805        return self.i + 1 if self.i is not None else None

Get the 1-based index of the entity in its parent's children list.

Returns:

int: The 1-based index of the entity, or None if not found.

next
807    @cached_property
808    def next(self):
809        """
810        Get the next sibling entity.
811
812        Returns:
813            Entity: The next sibling entity, or None if not found.
814        """
815        if self.i is None:
816            return None
817        try:
818            return self.parent.children[self.i + 1]
819        except IndexError:
820            return None

Get the next sibling entity.

Returns:

Entity: The next sibling entity, or None if not found.

prev
822    @cached_property
823    def prev(self):
824        """
825        Get the previous sibling entity.
826
827        Returns:
828            Entity: The previous sibling entity, or None if not found.
829        """
830        if self.i is None:
831            return None
832        i = self.i
833        if i - 1 < 0:
834            return None
835        try:
836            return self.parent.children[i - 1]
837        except IndexError:
838            return None

Get the previous sibling entity.

Returns:

Entity: The previous sibling entity, or None if not found.

is_text
840    @cached_property
841    def is_text(self):
842        """
843        Check if the entity is a text entity.
844
845        Returns:
846            bool: True if the entity is a text entity, False otherwise.
847        """
848        return self.__class__.__name__ == "Text"

Check if the entity is a text entity.

Returns:

bool: True if the entity is a text entity, False otherwise.

is_stanza
850    @cached_property
851    def is_stanza(self):
852        """
853        Check if the entity is a stanza entity.
854
855        Returns:
856            bool: True if the entity is a stanza entity, False otherwise.
857        """
858        return self.__class__.__name__ == "Stanza"

Check if the entity is a stanza entity.

Returns:

bool: True if the entity is a stanza entity, False otherwise.

is_line
860    @cached_property
861    def is_line(self):
862        """
863        Check if the entity is a line entity.
864
865        Returns:
866            bool: True if the entity is a line entity, False otherwise.
867        """
868        return self.__class__.__name__ == "Line"

Check if the entity is a line entity.

Returns:

bool: True if the entity is a line entity, False otherwise.

is_wordtoken
870    @cached_property
871    def is_wordtoken(self):
872        """
873        Check if the entity is a word token entity.
874
875        Returns:
876            bool: True if the entity is a word token entity, False otherwise.
877        """
878        return self.__class__.__name__ == "WordToken"

Check if the entity is a word token entity.

Returns:

bool: True if the entity is a word token entity, False otherwise.

is_wordtype
880    @cached_property
881    def is_wordtype(self):
882        """
883        Check if the entity is a word type entity.
884
885        Returns:
886            bool: True if the entity is a word type entity, False otherwise.
887        """
888        return self.__class__.__name__ == "WordType"

Check if the entity is a word type entity.

Returns:

bool: True if the entity is a word type entity, False otherwise.

is_wordform
890    @cached_property
891    def is_wordform(self):
892        """
893        Check if the entity is a word form entity.
894
895        Returns:
896            bool: True if the entity is a word form entity, False otherwise.
897        """
898        return self.__class__.__name__ == "WordForm"

Check if the entity is a word form entity.

Returns:

bool: True if the entity is a word form entity, False otherwise.

is_syll
900    @cached_property
901    def is_syll(self):
902        """
903        Check if the entity is a syllable entity.
904
905        Returns:
906            bool: True if the entity is a syllable entity, False otherwise.
907        """
908        return self.__class__.__name__ == "Syllable"

Check if the entity is a syllable entity.

Returns:

bool: True if the entity is a syllable entity, False otherwise.

is_phon
910    @cached_property
911    def is_phon(self):
912        """
913        Check if the entity is a phoneme entity.
914
915        Returns:
916            bool: True if the entity is a phoneme entity, False otherwise.
917        """
918        return self.__class__.__name__ == "PhonemeClass"

Check if the entity is a phoneme entity.

Returns:

bool: True if the entity is a phoneme entity, False otherwise.

def children_from_cache(self):
920    def children_from_cache(self):
921        """
922        Get the children of the entity from the cache.
923
924        Returns:
925            list: The list of child entities, or None if not found in the cache.
926        """
927        if caching_is_enabled():
928            res = self.from_cache()
929            print("FOUND", res)
930            return None if res is None else res.children

Get the children of the entity from the cache.

Returns:

list: The list of child entities, or None if not found in the cache.

def get_key(self, key):
932    def get_key(self, key):
933        """
934        Get a key for caching purposes.
935
936        Args:
937            key: The key object.
938
939        Returns:
940            str: The hashed key.
941        """
942        if hasattr(key, "to_hash"):
943            key = key.to_hash()
944        elif key:
945            key = hashstr(key)
946        return key

Get a key for caching purposes.

Arguments:
  • key: The key object.
Returns:

str: The hashed key.

def from_cache(self, obj=None, key=None, as_dict=False):
948    def from_cache(self, obj=None, key=None, as_dict=False):
949        """
950        Get an object from the cache.
951
952        Args:
953            obj: The object to cache.
954            key: The key for the cache.
955            as_dict (bool): If True, return the cached data as a dictionary.
956
957        Returns:
958            Any: The cached object, or None if not found.
959        """
960        if obj is None:
961            obj = self
962        key = self.get_key(obj) if not key else key
963        if key and self.use_cache != False:
964            cache = self.get_cache()
965            if key in cache:
966                dat = cache[key]
967                if dat:
968                    return from_json(dat) if not as_dict else dat

Get an object from the cache.

Arguments:
  • obj: The object to cache.
  • key: The key for the cache.
  • as_dict (bool): If True, return the cached data as a dictionary.
Returns:

Any: The cached object, or None if not found.

def get_cache(self):
970    def get_cache(self):
971        """
972        Get the cache object.
973
974        Returns:
975            SimpleCache: The cache object.
976        """
977        return SimpleCache()

Get the cache object.

Returns:

SimpleCache: The cache object.

def cache(self, key_obj=None, val_obj=None, key=None, force=False):
 979    def cache(
 980        self, key_obj=None, val_obj=None, key=None, force=False
 981    ):
 982        """
 983        Cache an object.
 984
 985        Args:
 986            key_obj: The object to use as the cache key.
 987            val_obj: The object to cache.
 988            key: An optional key for the cache.
 989            force (bool): If True, force the cache to be updated.
 990        """
 991        if key_obj is None:
 992            key_obj = self
 993        if val_obj is None:
 994            val_obj = key_obj
 995        logger.trace(f"key_obj = {key_obj}")
 996        logger.trace(f"val_obj = {val_obj}")
 997        key = self.get_key(key_obj) if not key else key
 998        cache = self.get_cache()
 999        if key and (force or not key in cache):
1000            with logmap(f"saving object under key {key[:8]}"):
1001                with logmap("exporting to json", level="trace"):
1002                    data = val_obj.to_json()
1003                with logmap("uploading json to cache", level="trace"):
1004                    cache[key] = data

Cache an object.

Arguments:
  • key_obj: The object to use as the cache key.
  • val_obj: The object to cache.
  • key: An optional key for the cache.
  • force (bool): If True, force the cache to be updated.
Inherited Members
collections.UserList
append
insert
pop
remove
clear
copy
count
index
reverse
sort
extend
class EntityList(Entity):
1007class EntityList(Entity):
1008    """
1009    A list of Entity objects.
1010    """
1011
1012    def __init__(self, children=[], parent=None, **kwargs):
1013        """
1014        Initialize an EntityList object.
1015
1016        Args:
1017            children (list): List of child entities.
1018            parent (Entity): The parent entity.
1019            **kwargs: Additional attributes to set on the entity.
1020        """
1021        self.parent = parent
1022        self.children = [x for x in children]
1023        self._attrs = kwargs
1024        self._txt = None
1025        for k, v in self._attrs.items():
1026            setattr(self, k, v)
1027
1028    @cached_property
1029    def txt(self):
1030        """
1031        Get the text content of the entity list.
1032
1033        Returns:
1034            None: Always returns None for EntityList objects.
1035        """
1036        return None

A list of Entity objects.

EntityList(children=[], parent=None, **kwargs)
1012    def __init__(self, children=[], parent=None, **kwargs):
1013        """
1014        Initialize an EntityList object.
1015
1016        Args:
1017            children (list): List of child entities.
1018            parent (Entity): The parent entity.
1019            **kwargs: Additional attributes to set on the entity.
1020        """
1021        self.parent = parent
1022        self.children = [x for x in children]
1023        self._attrs = kwargs
1024        self._txt = None
1025        for k, v in self._attrs.items():
1026            setattr(self, k, v)

Initialize an EntityList object.

Arguments:
  • children (list): List of child entities.
  • parent (Entity): The parent entity.
  • **kwargs: Additional attributes to set on the entity.
parent
children
txt
1028    @cached_property
1029    def txt(self):
1030        """
1031        Get the text content of the entity list.
1032
1033        Returns:
1034            None: Always returns None for EntityList objects.
1035        """
1036        return None

Get the text content of the entity list.

Returns:

None: Always returns None for EntityList objects.