prosodic.ents
1from typing import Any 2from .imports import * 3 4 5class Entity(UserList): 6 """ 7 Root Entity class representing a hierarchical structure in prosodic analysis. 8 9 This class serves as the base for various prosodic entities such as texts, stanzas, 10 lines, words, syllables, and phonemes. It provides common functionality for 11 managing hierarchical relationships, attributes, and data representation. 12 13 Attributes: 14 child_type (str): The type of child entities this entity can contain. 15 is_parseable (bool): Whether this entity can be parsed. 16 index_name (str): The name used for indexing this entity type. 17 prefix (str): A prefix used for attribute naming. 18 list_type (type): The type of list used for storing children. 19 cached_properties_to_clear (list): Properties to clear from cache. 20 use_cache (bool): Whether to use caching for this entity. 21 sep (str): Separator used when joining child texts. 22 """ 23 24 child_type = "Text" 25 is_parseable = False 26 index_name = None 27 prefix = "ent" 28 list_type = None 29 cached_properties_to_clear = [] 30 use_cache = False 31 sep = "" 32 33 def __init__(self, txt: str = "", children=[], parent=None, **kwargs): 34 """ 35 Initialize an Entity object. 36 37 Args: 38 txt (str): The text content of the entity. 39 children (list): List of child entities. 40 parent (Entity): The parent entity. 41 **kwargs: Additional attributes to set on the entity. 42 """ 43 self.parent = parent 44 newchildren = [] 45 for child in children: 46 if not isinstance(child, Entity): 47 logger.warning(f"{child} is not an Entity") 48 continue 49 newchildren.append(child) 50 # if not child.is_wordtype: # don't do this for wordtypes since each wordtype is a single/shared python object 51 child.parent = self 52 children = newchildren 53 if self.list_type is None: 54 self.list_type = 'EntityList' 55 from .imports import GLOBALS 56 self.children = GLOBALS[self.list_type](children) 57 self._attrs = kwargs 58 self._txt = txt 59 self._mtr = None 60 for k, v in self._attrs.items(): 61 setattr(self, k, v) 62 63 def __iter__(self): 64 """ 65 Iterate over the children of this entity. 66 67 Yields: 68 Entity: The next child entity. 69 """ 70 yield from self.children 71 72 def to_hash(self): 73 """ 74 Generate a hash representation of the entity. 75 76 Returns: 77 str: A hash string representing the entity's content and attributes. 78 """ 79 return hashstr( 80 self.txt, tuple(sorted(self._attrs.items())), self.__class__.__name__ 81 ) 82 83 @cached_property 84 def html(self): 85 """ 86 Get the HTML representation of the entity. 87 88 Returns: 89 str: HTML representation of the entity, if available. 90 """ 91 if hasattr(self, "to_html"): 92 return self.to_html() 93 94 @cached_property 95 def key(self): 96 """ 97 Generate a unique key for the entity. 98 99 Returns: 100 str: A string key representing the entity's class and attributes. 101 """ 102 attrs = { 103 **{k: v for k, v in self.attrs.items() if v is not None}, 104 "txt": self._txt, 105 } 106 return f"{self.__class__.__name__}({get_attr_str(attrs)})" 107 108 @cached_property 109 def hash(self): 110 """ 111 Get a hash value for the entity. 112 113 Returns: 114 str: A hash string for the entity. 115 """ 116 return hashstr(self.key) 117 118 def __hash__(self): 119 """ 120 Get the hash value for use in hash-based collections. 121 122 Returns: 123 int: The hash value of the entity. 124 """ 125 return hash(self.hash) 126 127 def __eq__(self, other): 128 """ 129 Check if this entity is equal to another. 130 131 Args: 132 other: The other object to compare with. 133 134 Returns: 135 bool: True if the objects are the same instance, False otherwise. 136 """ 137 return self is other 138 139 def __bool__(self): 140 """ 141 Check if the entity is considered True in a boolean context. 142 143 Returns: 144 bool: Always returns True for Entity objects. 145 """ 146 return True 147 148 def to_json(self, fn=None, no_txt=False, yes_txt=False, **kwargs): 149 """ 150 Convert the entity to a JSON representation. 151 152 Args: 153 fn (str, optional): Filename to save the JSON output. 154 no_txt (bool): If True, exclude the text content. 155 yes_txt (bool): If True, include the full text content. 156 **kwargs: Additional key-value pairs to include in the JSON. 157 158 Returns: 159 dict: A dictionary representation of the entity. 160 """ 161 txt = (self._txt if not yes_txt else self.txt) if not no_txt else None 162 return to_json( 163 { 164 "_class": self.__class__.__name__, 165 **({"txt": txt} if txt is not None and (yes_txt or txt) else {}), 166 "children": [kid.to_json() for kid in self.children], 167 **kwargs, 168 }, 169 fn=fn, 170 ) 171 172 def save(self, fn, **kwargs): 173 """ 174 Save the entity to a file in JSON format. 175 176 Args: 177 fn (str): The filename to save to. 178 **kwargs: Additional arguments to pass to to_json. 179 180 Returns: 181 The result of to_json with the given filename. 182 """ 183 return self.to_json(fn=fn, **kwargs) 184 185 def render(self, as_str=False): 186 """ 187 Render the entity as HTML. 188 189 Args: 190 as_str (bool): If True, return the result as a string. 191 192 Returns: 193 str or HTML: The rendered HTML representation of the entity. 194 """ 195 return self.to_html(as_str=as_str) 196 197 @staticmethod 198 def from_json(json_d): 199 """ 200 Create an Entity object from a JSON dictionary. 201 202 Args: 203 json_d (dict): A dictionary containing the entity data. 204 205 Returns: 206 Entity: An instance of the appropriate Entity subclass. 207 """ 208 from .imports import GLOBALS, CHILDCLASSES 209 210 classname = json_d["_class"] 211 classx = GLOBALS[classname] 212 childx = CHILDCLASSES.get(classname) 213 children = json_d.get("children", []) 214 inpd = {k: v for k, v in json_d.items() if k not in {"children", "_class"}} 215 if children and childx: 216 children = [childx.from_json(d) for d in json_d["children"]] 217 return classx(children=tuple(children), **inpd) 218 219 @property 220 def attrs(self): 221 """ 222 Get the attributes of the entity. 223 224 Returns: 225 dict: A dictionary of the entity's attributes. 226 """ 227 odx = {"num": self.num} 228 if ( 229 self.__class__.__name__ 230 not in {"Text", "Stanza", "MeterLine", "MeterText", "Meter"} 231 and self.txt 232 ): 233 odx["txt"] = self.txt 234 return {**odx, **self._attrs} 235 236 @cached_property 237 def prefix_attrs(self, with_parent=True): 238 """ 239 Get the attributes of the entity with a prefix. 240 241 Args: 242 with_parent (bool): If True, include parent attributes. 243 244 Returns: 245 dict: A dictionary of the entity's attributes with a prefix. 246 """ 247 248 def getkey(k): 249 o = f"{self.prefix}_{k}" 250 o = DF_COLS_RENAME.get(o, o) 251 return o 252 253 odx = {getkey(k): v for k, v in self.attrs.items() if v is not None} 254 if with_parent and self.parent: 255 return {**self.parent.prefix_attrs, **odx} 256 return odx 257 258 @cached_property 259 def txt(self): 260 """ 261 Get the text content of the entity. 262 263 Returns: 264 str: The text content of the entity. 265 """ 266 if self._txt: 267 txt = self._txt 268 elif self.children: 269 txt = self.child_class.sep.join(child.txt for child in self.children) 270 else: 271 txt = "" 272 return clean_text(txt) 273 274 @cached_property 275 def data(self): 276 """ 277 Get the data associated with the entity. 278 279 Returns: 280 list: The list of child entities. 281 """ 282 return self.children 283 284 @cached_property 285 def l(self): 286 """ 287 Get the list of child entities. 288 289 Returns: 290 list: The list of child entities. 291 """ 292 return self.children 293 294 def clear_cached_properties(self): 295 """ 296 Clear cached properties to free up memory. 297 """ 298 for prop in self.cached_properties_to_clear: 299 if prop in self.__dict__: 300 del self.__dict__[prop] 301 # elif hasattr(self,prop): 302 # try: 303 # func = getattr(self,prop) 304 # func.clear_cache() 305 # except AttributeError: 306 # pass 307 308 def inspect(self, indent=0, maxlines=None, incl_phons=False): 309 """ 310 Inspect the entity and its children. 311 312 Args: 313 indent (int): The indentation level for the output. 314 maxlines (int): The maximum number of lines to display. 315 incl_phons (bool): If True, include phoneme information. 316 """ 317 attrstr = get_attr_str(self.attrs) 318 myself = f"{self.__class__.__name__}({attrstr})" 319 if indent: 320 myself = textwrap.indent(myself, "|" + (" " * (indent - 1))) 321 lines = [myself] 322 for child in self.children: 323 if isinstance(child, Entity) and ( 324 incl_phons or not child.__class__.__name__.startswith("Phoneme") 325 ): 326 lines.append( 327 child.inspect(indent=indent + 4, incl_phons=incl_phons).replace( 328 "PhonemeClass", "Phoneme" 329 ) 330 ) 331 # self.__class__.__name__ in {'Text', 'Stanza', 'Line'} 332 dblbreakfor = False 333 breakstr = "\n|\n" if dblbreakfor else "\n" 334 o = breakstr.join(lines) 335 if not indent: 336 if maxlines: 337 o = "\n".join(o.split("\n")[:maxlines]) 338 print(o) 339 else: 340 return o 341 342 def _repr_html_(self, df=None): 343 """ 344 Get the HTML representation of the entity. 345 346 Args: 347 df (DataFrame): An optional DataFrame to use for rendering. 348 349 Returns: 350 str: The HTML representation of the entity. 351 """ 352 353 def blank(x): 354 if x in {None, np.nan}: 355 return "" 356 return x 357 358 return (self.df if df is None else df).applymap(blank)._repr_html_() 359 360 def __repr__(self, attrs=None, bad_keys=None): 361 """ 362 Get a string representation of the entity. 363 364 Args: 365 attrs (dict): An optional dictionary of attributes to use. 366 bad_keys (list): An optional list of keys to exclude. 367 368 Returns: 369 str: A string representation of the entity. 370 """ 371 d = { 372 k: v 373 for k, v in ( 374 attrs 375 if attrs is not None 376 else (self.attrs if self.attrs is not None else self._attrs) 377 ).items() 378 } 379 return f"{self.__class__.__name__}({get_attr_str(d, bad_keys=bad_keys)})" 380 381 @cached_property 382 def ld(self): 383 """ 384 Get a list of dictionaries representing the entity and its children. 385 386 Returns: 387 list: A list of dictionaries representing the entity and its children. 388 """ 389 return self.get_ld() 390 391 @cached_property 392 def child_class(self): 393 """ 394 Get the class of the child entities. 395 396 Returns: 397 type: The class of the child entities. 398 """ 399 from .imports import GLOBALS 400 401 return GLOBALS.get(self.child_type) 402 403 def get_ld(self, incl_phons=False, incl_sylls=True, multiple_wordforms=True): 404 """ 405 Get a list of dictionaries representing the entity and its children. 406 407 Args: 408 incl_phons (bool): If True, include phoneme information. 409 incl_sylls (bool): If True, include syllable information. 410 multiple_wordforms (bool): If True, include multiple word forms. 411 412 Returns: 413 list: A list of dictionaries representing the entity and its children. 414 """ 415 if not incl_sylls and self.child_type == "Syllable": 416 return [{**self.prefix_attrs}] 417 if not incl_phons and self.child_type == "Phoneme": 418 return [{**self.prefix_attrs}] 419 good_children = [c for c in self.children if isinstance(c, Entity)] 420 # logger.debug(f'good children of {type(self)} -> {good_children}') 421 if not multiple_wordforms and self.child_type == "WordForm" and good_children: 422 good_children = good_children[:1] 423 # logger.debug(f'good children now {good_children}') 424 if good_children: 425 return [ 426 {**self.prefix_attrs, **child.prefix_attrs, **grandchild_d} 427 for child in good_children 428 for grandchild_d in child.get_ld( 429 incl_phons=incl_phons, 430 incl_sylls=incl_sylls, 431 multiple_wordforms=multiple_wordforms, 432 ) 433 ] 434 else: 435 return [{**self.prefix_attrs}] 436 437 def get_df(self, **kwargs): 438 """ 439 Get a DataFrame representation of the entity and its children. 440 441 Args: 442 **kwargs: Additional arguments to pass to get_ld. 443 444 Returns: 445 DataFrame: A DataFrame representation of the entity and its children. 446 """ 447 odf = pd.DataFrame(self.get_ld(**kwargs)) 448 for c in DF_BADCOLS: 449 if c in set(odf.columns): 450 odf = odf.drop(c, axis=1) 451 for c in odf: 452 if c.endswith("_num"): 453 odf[c] = odf[c].fillna(0).apply(int) 454 else: 455 odf[c] = odf[c].fillna("") 456 odf = setindex(odf, DF_INDEX) 457 458 def unbool(x): 459 if x is True: 460 return 1 461 if x is False: 462 return 0 463 if x is None: 464 return 0 465 return x 466 467 odf = odf.applymap(unbool) 468 return odf 469 470 @cached_property 471 def df(self): 472 """ 473 Get a DataFrame representation of the entity and its children. 474 475 Returns: 476 DataFrame: A DataFrame representation of the entity and its children. 477 """ 478 return self.get_df() 479 480 def __getattr__(self, attr): 481 """ 482 Get an attribute of the entity by name. 483 484 Args: 485 attr (str): The name of the attribute. 486 487 Returns: 488 Any: The value of the attribute. 489 """ 490 objs = { 491 "stanza": "stanzas", 492 "line": "lines", 493 "word": "wordtokens", 494 "wordtoken": "wordtokens", 495 "wordtype": "wordtypes", 496 "wordform": "wordforms", 497 "syllable": "syllables", 498 "phoneme": "phonemes", 499 } 500 if attr[-1].isdigit(): 501 for pref, lname in objs.items(): 502 if attr.startswith(pref) and attr[len(pref) :].isdigit(): 503 num = int(attr[len(pref) :]) 504 try: 505 return getattr(self, lname)[num - 1] 506 except IndexError: 507 logger.warning(f"no {pref} at that number") 508 return 509 510 def get_parent(self, parent_type=None): 511 """ 512 Get the parent entity of a specific type. 513 514 Args: 515 parent_type (str): The type of parent entity to find. 516 517 Returns: 518 Entity: The parent entity of the specified type, or None if not found. 519 """ 520 logger.trace(self.__class__.__name__) 521 if not hasattr(self, "parent") or not self.parent: 522 return 523 if self.parent.__class__.__name__ == parent_type: 524 return self.parent 525 return self.parent.get_parent(parent_type) 526 527 @cached_property 528 def stanzas(self): 529 """ 530 Get the list of stanza entities. 531 532 Returns: 533 StanzaList: A list of stanza entities. 534 """ 535 from .texts import StanzaList 536 537 if self.is_text: 538 o = self.children 539 elif self.is_stanza: 540 o = [self] 541 else: 542 o = [] 543 return StanzaList(o) 544 545 @property 546 def line_r(self): 547 """ 548 Get a random line entity. 549 550 Returns: 551 Line: A random line entity, or None if no lines exist. 552 """ 553 return random.choice(self.lines) if self.lines else None 554 555 @property 556 def word_r(self): 557 """ 558 Get a random word entity. 559 560 Returns: 561 WordToken: A random word entity, or None if no words exist. 562 """ 563 return random.choice(self.words) if self.words else None 564 565 @cached_property 566 def lines(self): 567 """ 568 Get the list of line entities. 569 570 Returns: 571 LineList: A list of line entities. 572 """ 573 from .texts import LineList 574 575 if self.is_stanza: 576 o = self.children 577 elif self.is_line: 578 o = [self] 579 else: 580 o = [line for stanza in self.stanzas for line in stanza.children] 581 return LineList(o) 582 583 @cached_property 584 def wordtokens(self): 585 """ 586 Get the list of word token entities. 587 588 Returns: 589 WordTokenList: A list of word token entities. 590 """ 591 from .words import WordTokenList 592 593 if self.is_line: 594 o = self.children 595 elif self.is_wordtoken: 596 o = [self] 597 else: 598 o = [wt for line in self.lines for wt in line.children] 599 return WordTokenList(o) 600 601 @property 602 def words(self): 603 """ 604 Get the list of word token entities. 605 606 Returns: 607 WordTokenList: A list of word token entities. 608 """ 609 return self.wordtokens 610 611 @cached_property 612 def wordtypes(self): 613 """ 614 Get the list of word type entities. 615 616 Returns: 617 WordTypeList: A list of word type entities. 618 """ 619 from .words import WordTypeList 620 621 if self.is_wordtoken: 622 o = self.children 623 elif self.is_wordtype: 624 o = [self] 625 else: 626 o = [wtype for token in self.wordtokens for wtype in token.children] 627 return WordTypeList(o) 628 629 @cached_property 630 def wordforms(self): 631 """ 632 Get the list of word form entities. 633 634 Returns: 635 WordFormList: A list of word form entities. 636 """ 637 from .words import WordFormList 638 639 if self.is_wordtype: 640 o = self.children[:1] 641 elif self.is_wordtype: 642 o = [self] 643 else: 644 o = [wtype.children[0] for wtype in self.wordtypes if wtype.children] 645 return WordFormList(o) 646 647 @cached_property 648 def wordforms_nopunc(self): 649 """ 650 Get the list of word form entities, excluding punctuation. 651 652 Returns: 653 list: A list of word form entities, excluding punctuation. 654 """ 655 return [wf for wf in self.wordforms if not wf.parent.is_punc] 656 657 @cached_property 658 def wordforms_all(self): 659 """ 660 Get the list of all word form entities. 661 662 Returns: 663 list: A list of all word form entities. 664 """ 665 if self.is_wordtype: 666 o = self.children 667 if self.is_wordform: 668 o = [self] 669 else: 670 o = [wtype.children for wtype in self.wordtypes] 671 return o 672 673 @cached_property 674 def syllables(self): 675 """ 676 Get the list of syllable entities. 677 678 Returns: 679 SyllableList: A list of syllable entities. 680 """ 681 from .words import SyllableList 682 683 if self.is_wordform: 684 o = self.children 685 if self.is_syll: 686 o = [self] 687 else: 688 o = [syll for wf in self.wordforms for syll in wf.children] 689 return SyllableList(o) 690 691 @cached_property 692 def phonemes(self): 693 """ 694 Get the list of phoneme entities. 695 696 Returns: 697 PhonemeList: A list of phoneme entities. 698 """ 699 from .words import PhonemeList 700 701 if self.is_syll: 702 o = self.children 703 if self.is_phon: 704 o = [self] 705 else: 706 o = [phon for syll in self.syllables for phon in syll.children] 707 return PhonemeList(o) 708 709 @cached_property 710 def text(self): 711 """ 712 Get the parent text entity. 713 714 Returns: 715 Text: The parent text entity, or None if not found. 716 """ 717 return self.get_parent("Text") 718 719 @cached_property 720 def stanza(self): 721 """ 722 Get the parent stanza entity. 723 724 Returns: 725 Stanza: The parent stanza entity, or None if not found. 726 """ 727 return self.get_parent("Stanza") 728 729 @cached_property 730 def line(self): 731 """ 732 Get the parent line entity. 733 734 Returns: 735 Line: The parent line entity, or None if not found. 736 """ 737 return self.get_parent("Line") 738 739 @cached_property 740 def wordtoken(self): 741 """ 742 Get the parent word token entity. 743 744 Returns: 745 WordToken: The parent word token entity, or None if not found. 746 """ 747 return self.get_parent("WordToken") 748 749 @cached_property 750 def wordtype(self): 751 """ 752 Get the parent word type entity. 753 754 Returns: 755 WordType: The parent word type entity, or None if not found. 756 """ 757 return self.get_parent("WordType") 758 759 @cached_property 760 def wordform(self): 761 """ 762 Get the parent word form entity. 763 764 Returns: 765 WordForm: The parent word form entity, or None if not found. 766 """ 767 return self.get_parent("WordForm") 768 769 @cached_property 770 def syllable(self): 771 """ 772 Get the parent syllable entity. 773 774 Returns: 775 Syllable: The parent syllable entity, or None if not found. 776 """ 777 return self.get_parent("Syllable") 778 779 @cached_property 780 def i(self): 781 """ 782 Get the index of the entity in its parent's children list. 783 784 Returns: 785 int: The index of the entity, or None if not found. 786 """ 787 if self.parent is None: 788 return None 789 if not self.parent.children: 790 return None 791 try: 792 return self.parent.children.index(self) 793 except IndexError: 794 return None 795 796 @cached_property 797 def num(self): 798 """ 799 Get the 1-based index of the entity in its parent's children list. 800 801 Returns: 802 int: The 1-based index of the entity, or None if not found. 803 """ 804 return self.i + 1 if self.i is not None else None 805 806 @cached_property 807 def next(self): 808 """ 809 Get the next sibling entity. 810 811 Returns: 812 Entity: The next sibling entity, or None if not found. 813 """ 814 if self.i is None: 815 return None 816 try: 817 return self.parent.children[self.i + 1] 818 except IndexError: 819 return None 820 821 @cached_property 822 def prev(self): 823 """ 824 Get the previous sibling entity. 825 826 Returns: 827 Entity: The previous sibling entity, or None if not found. 828 """ 829 if self.i is None: 830 return None 831 i = self.i 832 if i - 1 < 0: 833 return None 834 try: 835 return self.parent.children[i - 1] 836 except IndexError: 837 return None 838 839 @cached_property 840 def is_text(self): 841 """ 842 Check if the entity is a text entity. 843 844 Returns: 845 bool: True if the entity is a text entity, False otherwise. 846 """ 847 return self.__class__.__name__ == "Text" 848 849 @cached_property 850 def is_stanza(self): 851 """ 852 Check if the entity is a stanza entity. 853 854 Returns: 855 bool: True if the entity is a stanza entity, False otherwise. 856 """ 857 return self.__class__.__name__ == "Stanza" 858 859 @cached_property 860 def is_line(self): 861 """ 862 Check if the entity is a line entity. 863 864 Returns: 865 bool: True if the entity is a line entity, False otherwise. 866 """ 867 return self.__class__.__name__ == "Line" 868 869 @cached_property 870 def is_wordtoken(self): 871 """ 872 Check if the entity is a word token entity. 873 874 Returns: 875 bool: True if the entity is a word token entity, False otherwise. 876 """ 877 return self.__class__.__name__ == "WordToken" 878 879 @cached_property 880 def is_wordtype(self): 881 """ 882 Check if the entity is a word type entity. 883 884 Returns: 885 bool: True if the entity is a word type entity, False otherwise. 886 """ 887 return self.__class__.__name__ == "WordType" 888 889 @cached_property 890 def is_wordform(self): 891 """ 892 Check if the entity is a word form entity. 893 894 Returns: 895 bool: True if the entity is a word form entity, False otherwise. 896 """ 897 return self.__class__.__name__ == "WordForm" 898 899 @cached_property 900 def is_syll(self): 901 """ 902 Check if the entity is a syllable entity. 903 904 Returns: 905 bool: True if the entity is a syllable entity, False otherwise. 906 """ 907 return self.__class__.__name__ == "Syllable" 908 909 @cached_property 910 def is_phon(self): 911 """ 912 Check if the entity is a phoneme entity. 913 914 Returns: 915 bool: True if the entity is a phoneme entity, False otherwise. 916 """ 917 return self.__class__.__name__ == "PhonemeClass" 918 919 def children_from_cache(self): 920 """ 921 Get the children of the entity from the cache. 922 923 Returns: 924 list: The list of child entities, or None if not found in the cache. 925 """ 926 if caching_is_enabled(): 927 res = self.from_cache() 928 print("FOUND", res) 929 return None if res is None else res.children 930 931 def get_key(self, key): 932 """ 933 Get a key for caching purposes. 934 935 Args: 936 key: The key object. 937 938 Returns: 939 str: The hashed key. 940 """ 941 if hasattr(key, "to_hash"): 942 key = key.to_hash() 943 elif key: 944 key = hashstr(key) 945 return key 946 947 def from_cache(self, obj=None, key=None, as_dict=False): 948 """ 949 Get an object from the cache. 950 951 Args: 952 obj: The object to cache. 953 key: The key for the cache. 954 as_dict (bool): If True, return the cached data as a dictionary. 955 956 Returns: 957 Any: The cached object, or None if not found. 958 """ 959 if obj is None: 960 obj = self 961 key = self.get_key(obj) if not key else key 962 if key and self.use_cache != False: 963 cache = self.get_cache() 964 if key in cache: 965 dat = cache[key] 966 if dat: 967 return from_json(dat) if not as_dict else dat 968 969 def get_cache(self): 970 """ 971 Get the cache object. 972 973 Returns: 974 SimpleCache: The cache object. 975 """ 976 return SimpleCache() 977 978 def cache( 979 self, key_obj=None, val_obj=None, key=None, force=False 980 ): 981 """ 982 Cache an object. 983 984 Args: 985 key_obj: The object to use as the cache key. 986 val_obj: The object to cache. 987 key: An optional key for the cache. 988 force (bool): If True, force the cache to be updated. 989 """ 990 if key_obj is None: 991 key_obj = self 992 if val_obj is None: 993 val_obj = key_obj 994 logger.trace(f"key_obj = {key_obj}") 995 logger.trace(f"val_obj = {val_obj}") 996 key = self.get_key(key_obj) if not key else key 997 cache = self.get_cache() 998 if key and (force or not key in cache): 999 with logmap(f"saving object under key {key[:8]}"): 1000 with logmap("exporting to json", level="trace"): 1001 data = val_obj.to_json() 1002 with logmap("uploading json to cache", level="trace"): 1003 cache[key] = data 1004 1005 1006class EntityList(Entity): 1007 """ 1008 A list of Entity objects. 1009 """ 1010 1011 def __init__(self, children=[], parent=None, **kwargs): 1012 """ 1013 Initialize an EntityList object. 1014 1015 Args: 1016 children (list): List of child entities. 1017 parent (Entity): The parent entity. 1018 **kwargs: Additional attributes to set on the entity. 1019 """ 1020 self.parent = parent 1021 self.children = [x for x in children] 1022 self._attrs = kwargs 1023 self._txt = None 1024 for k, v in self._attrs.items(): 1025 setattr(self, k, v) 1026 1027 @cached_property 1028 def txt(self): 1029 """ 1030 Get the text content of the entity list. 1031 1032 Returns: 1033 None: Always returns None for EntityList objects. 1034 """ 1035 return None
6class Entity(UserList): 7 """ 8 Root Entity class representing a hierarchical structure in prosodic analysis. 9 10 This class serves as the base for various prosodic entities such as texts, stanzas, 11 lines, words, syllables, and phonemes. It provides common functionality for 12 managing hierarchical relationships, attributes, and data representation. 13 14 Attributes: 15 child_type (str): The type of child entities this entity can contain. 16 is_parseable (bool): Whether this entity can be parsed. 17 index_name (str): The name used for indexing this entity type. 18 prefix (str): A prefix used for attribute naming. 19 list_type (type): The type of list used for storing children. 20 cached_properties_to_clear (list): Properties to clear from cache. 21 use_cache (bool): Whether to use caching for this entity. 22 sep (str): Separator used when joining child texts. 23 """ 24 25 child_type = "Text" 26 is_parseable = False 27 index_name = None 28 prefix = "ent" 29 list_type = None 30 cached_properties_to_clear = [] 31 use_cache = False 32 sep = "" 33 34 def __init__(self, txt: str = "", children=[], parent=None, **kwargs): 35 """ 36 Initialize an Entity object. 37 38 Args: 39 txt (str): The text content of the entity. 40 children (list): List of child entities. 41 parent (Entity): The parent entity. 42 **kwargs: Additional attributes to set on the entity. 43 """ 44 self.parent = parent 45 newchildren = [] 46 for child in children: 47 if not isinstance(child, Entity): 48 logger.warning(f"{child} is not an Entity") 49 continue 50 newchildren.append(child) 51 # if not child.is_wordtype: # don't do this for wordtypes since each wordtype is a single/shared python object 52 child.parent = self 53 children = newchildren 54 if self.list_type is None: 55 self.list_type = 'EntityList' 56 from .imports import GLOBALS 57 self.children = GLOBALS[self.list_type](children) 58 self._attrs = kwargs 59 self._txt = txt 60 self._mtr = None 61 for k, v in self._attrs.items(): 62 setattr(self, k, v) 63 64 def __iter__(self): 65 """ 66 Iterate over the children of this entity. 67 68 Yields: 69 Entity: The next child entity. 70 """ 71 yield from self.children 72 73 def to_hash(self): 74 """ 75 Generate a hash representation of the entity. 76 77 Returns: 78 str: A hash string representing the entity's content and attributes. 79 """ 80 return hashstr( 81 self.txt, tuple(sorted(self._attrs.items())), self.__class__.__name__ 82 ) 83 84 @cached_property 85 def html(self): 86 """ 87 Get the HTML representation of the entity. 88 89 Returns: 90 str: HTML representation of the entity, if available. 91 """ 92 if hasattr(self, "to_html"): 93 return self.to_html() 94 95 @cached_property 96 def key(self): 97 """ 98 Generate a unique key for the entity. 99 100 Returns: 101 str: A string key representing the entity's class and attributes. 102 """ 103 attrs = { 104 **{k: v for k, v in self.attrs.items() if v is not None}, 105 "txt": self._txt, 106 } 107 return f"{self.__class__.__name__}({get_attr_str(attrs)})" 108 109 @cached_property 110 def hash(self): 111 """ 112 Get a hash value for the entity. 113 114 Returns: 115 str: A hash string for the entity. 116 """ 117 return hashstr(self.key) 118 119 def __hash__(self): 120 """ 121 Get the hash value for use in hash-based collections. 122 123 Returns: 124 int: The hash value of the entity. 125 """ 126 return hash(self.hash) 127 128 def __eq__(self, other): 129 """ 130 Check if this entity is equal to another. 131 132 Args: 133 other: The other object to compare with. 134 135 Returns: 136 bool: True if the objects are the same instance, False otherwise. 137 """ 138 return self is other 139 140 def __bool__(self): 141 """ 142 Check if the entity is considered True in a boolean context. 143 144 Returns: 145 bool: Always returns True for Entity objects. 146 """ 147 return True 148 149 def to_json(self, fn=None, no_txt=False, yes_txt=False, **kwargs): 150 """ 151 Convert the entity to a JSON representation. 152 153 Args: 154 fn (str, optional): Filename to save the JSON output. 155 no_txt (bool): If True, exclude the text content. 156 yes_txt (bool): If True, include the full text content. 157 **kwargs: Additional key-value pairs to include in the JSON. 158 159 Returns: 160 dict: A dictionary representation of the entity. 161 """ 162 txt = (self._txt if not yes_txt else self.txt) if not no_txt else None 163 return to_json( 164 { 165 "_class": self.__class__.__name__, 166 **({"txt": txt} if txt is not None and (yes_txt or txt) else {}), 167 "children": [kid.to_json() for kid in self.children], 168 **kwargs, 169 }, 170 fn=fn, 171 ) 172 173 def save(self, fn, **kwargs): 174 """ 175 Save the entity to a file in JSON format. 176 177 Args: 178 fn (str): The filename to save to. 179 **kwargs: Additional arguments to pass to to_json. 180 181 Returns: 182 The result of to_json with the given filename. 183 """ 184 return self.to_json(fn=fn, **kwargs) 185 186 def render(self, as_str=False): 187 """ 188 Render the entity as HTML. 189 190 Args: 191 as_str (bool): If True, return the result as a string. 192 193 Returns: 194 str or HTML: The rendered HTML representation of the entity. 195 """ 196 return self.to_html(as_str=as_str) 197 198 @staticmethod 199 def from_json(json_d): 200 """ 201 Create an Entity object from a JSON dictionary. 202 203 Args: 204 json_d (dict): A dictionary containing the entity data. 205 206 Returns: 207 Entity: An instance of the appropriate Entity subclass. 208 """ 209 from .imports import GLOBALS, CHILDCLASSES 210 211 classname = json_d["_class"] 212 classx = GLOBALS[classname] 213 childx = CHILDCLASSES.get(classname) 214 children = json_d.get("children", []) 215 inpd = {k: v for k, v in json_d.items() if k not in {"children", "_class"}} 216 if children and childx: 217 children = [childx.from_json(d) for d in json_d["children"]] 218 return classx(children=tuple(children), **inpd) 219 220 @property 221 def attrs(self): 222 """ 223 Get the attributes of the entity. 224 225 Returns: 226 dict: A dictionary of the entity's attributes. 227 """ 228 odx = {"num": self.num} 229 if ( 230 self.__class__.__name__ 231 not in {"Text", "Stanza", "MeterLine", "MeterText", "Meter"} 232 and self.txt 233 ): 234 odx["txt"] = self.txt 235 return {**odx, **self._attrs} 236 237 @cached_property 238 def prefix_attrs(self, with_parent=True): 239 """ 240 Get the attributes of the entity with a prefix. 241 242 Args: 243 with_parent (bool): If True, include parent attributes. 244 245 Returns: 246 dict: A dictionary of the entity's attributes with a prefix. 247 """ 248 249 def getkey(k): 250 o = f"{self.prefix}_{k}" 251 o = DF_COLS_RENAME.get(o, o) 252 return o 253 254 odx = {getkey(k): v for k, v in self.attrs.items() if v is not None} 255 if with_parent and self.parent: 256 return {**self.parent.prefix_attrs, **odx} 257 return odx 258 259 @cached_property 260 def txt(self): 261 """ 262 Get the text content of the entity. 263 264 Returns: 265 str: The text content of the entity. 266 """ 267 if self._txt: 268 txt = self._txt 269 elif self.children: 270 txt = self.child_class.sep.join(child.txt for child in self.children) 271 else: 272 txt = "" 273 return clean_text(txt) 274 275 @cached_property 276 def data(self): 277 """ 278 Get the data associated with the entity. 279 280 Returns: 281 list: The list of child entities. 282 """ 283 return self.children 284 285 @cached_property 286 def l(self): 287 """ 288 Get the list of child entities. 289 290 Returns: 291 list: The list of child entities. 292 """ 293 return self.children 294 295 def clear_cached_properties(self): 296 """ 297 Clear cached properties to free up memory. 298 """ 299 for prop in self.cached_properties_to_clear: 300 if prop in self.__dict__: 301 del self.__dict__[prop] 302 # elif hasattr(self,prop): 303 # try: 304 # func = getattr(self,prop) 305 # func.clear_cache() 306 # except AttributeError: 307 # pass 308 309 def inspect(self, indent=0, maxlines=None, incl_phons=False): 310 """ 311 Inspect the entity and its children. 312 313 Args: 314 indent (int): The indentation level for the output. 315 maxlines (int): The maximum number of lines to display. 316 incl_phons (bool): If True, include phoneme information. 317 """ 318 attrstr = get_attr_str(self.attrs) 319 myself = f"{self.__class__.__name__}({attrstr})" 320 if indent: 321 myself = textwrap.indent(myself, "|" + (" " * (indent - 1))) 322 lines = [myself] 323 for child in self.children: 324 if isinstance(child, Entity) and ( 325 incl_phons or not child.__class__.__name__.startswith("Phoneme") 326 ): 327 lines.append( 328 child.inspect(indent=indent + 4, incl_phons=incl_phons).replace( 329 "PhonemeClass", "Phoneme" 330 ) 331 ) 332 # self.__class__.__name__ in {'Text', 'Stanza', 'Line'} 333 dblbreakfor = False 334 breakstr = "\n|\n" if dblbreakfor else "\n" 335 o = breakstr.join(lines) 336 if not indent: 337 if maxlines: 338 o = "\n".join(o.split("\n")[:maxlines]) 339 print(o) 340 else: 341 return o 342 343 def _repr_html_(self, df=None): 344 """ 345 Get the HTML representation of the entity. 346 347 Args: 348 df (DataFrame): An optional DataFrame to use for rendering. 349 350 Returns: 351 str: The HTML representation of the entity. 352 """ 353 354 def blank(x): 355 if x in {None, np.nan}: 356 return "" 357 return x 358 359 return (self.df if df is None else df).applymap(blank)._repr_html_() 360 361 def __repr__(self, attrs=None, bad_keys=None): 362 """ 363 Get a string representation of the entity. 364 365 Args: 366 attrs (dict): An optional dictionary of attributes to use. 367 bad_keys (list): An optional list of keys to exclude. 368 369 Returns: 370 str: A string representation of the entity. 371 """ 372 d = { 373 k: v 374 for k, v in ( 375 attrs 376 if attrs is not None 377 else (self.attrs if self.attrs is not None else self._attrs) 378 ).items() 379 } 380 return f"{self.__class__.__name__}({get_attr_str(d, bad_keys=bad_keys)})" 381 382 @cached_property 383 def ld(self): 384 """ 385 Get a list of dictionaries representing the entity and its children. 386 387 Returns: 388 list: A list of dictionaries representing the entity and its children. 389 """ 390 return self.get_ld() 391 392 @cached_property 393 def child_class(self): 394 """ 395 Get the class of the child entities. 396 397 Returns: 398 type: The class of the child entities. 399 """ 400 from .imports import GLOBALS 401 402 return GLOBALS.get(self.child_type) 403 404 def get_ld(self, incl_phons=False, incl_sylls=True, multiple_wordforms=True): 405 """ 406 Get a list of dictionaries representing the entity and its children. 407 408 Args: 409 incl_phons (bool): If True, include phoneme information. 410 incl_sylls (bool): If True, include syllable information. 411 multiple_wordforms (bool): If True, include multiple word forms. 412 413 Returns: 414 list: A list of dictionaries representing the entity and its children. 415 """ 416 if not incl_sylls and self.child_type == "Syllable": 417 return [{**self.prefix_attrs}] 418 if not incl_phons and self.child_type == "Phoneme": 419 return [{**self.prefix_attrs}] 420 good_children = [c for c in self.children if isinstance(c, Entity)] 421 # logger.debug(f'good children of {type(self)} -> {good_children}') 422 if not multiple_wordforms and self.child_type == "WordForm" and good_children: 423 good_children = good_children[:1] 424 # logger.debug(f'good children now {good_children}') 425 if good_children: 426 return [ 427 {**self.prefix_attrs, **child.prefix_attrs, **grandchild_d} 428 for child in good_children 429 for grandchild_d in child.get_ld( 430 incl_phons=incl_phons, 431 incl_sylls=incl_sylls, 432 multiple_wordforms=multiple_wordforms, 433 ) 434 ] 435 else: 436 return [{**self.prefix_attrs}] 437 438 def get_df(self, **kwargs): 439 """ 440 Get a DataFrame representation of the entity and its children. 441 442 Args: 443 **kwargs: Additional arguments to pass to get_ld. 444 445 Returns: 446 DataFrame: A DataFrame representation of the entity and its children. 447 """ 448 odf = pd.DataFrame(self.get_ld(**kwargs)) 449 for c in DF_BADCOLS: 450 if c in set(odf.columns): 451 odf = odf.drop(c, axis=1) 452 for c in odf: 453 if c.endswith("_num"): 454 odf[c] = odf[c].fillna(0).apply(int) 455 else: 456 odf[c] = odf[c].fillna("") 457 odf = setindex(odf, DF_INDEX) 458 459 def unbool(x): 460 if x is True: 461 return 1 462 if x is False: 463 return 0 464 if x is None: 465 return 0 466 return x 467 468 odf = odf.applymap(unbool) 469 return odf 470 471 @cached_property 472 def df(self): 473 """ 474 Get a DataFrame representation of the entity and its children. 475 476 Returns: 477 DataFrame: A DataFrame representation of the entity and its children. 478 """ 479 return self.get_df() 480 481 def __getattr__(self, attr): 482 """ 483 Get an attribute of the entity by name. 484 485 Args: 486 attr (str): The name of the attribute. 487 488 Returns: 489 Any: The value of the attribute. 490 """ 491 objs = { 492 "stanza": "stanzas", 493 "line": "lines", 494 "word": "wordtokens", 495 "wordtoken": "wordtokens", 496 "wordtype": "wordtypes", 497 "wordform": "wordforms", 498 "syllable": "syllables", 499 "phoneme": "phonemes", 500 } 501 if attr[-1].isdigit(): 502 for pref, lname in objs.items(): 503 if attr.startswith(pref) and attr[len(pref) :].isdigit(): 504 num = int(attr[len(pref) :]) 505 try: 506 return getattr(self, lname)[num - 1] 507 except IndexError: 508 logger.warning(f"no {pref} at that number") 509 return 510 511 def get_parent(self, parent_type=None): 512 """ 513 Get the parent entity of a specific type. 514 515 Args: 516 parent_type (str): The type of parent entity to find. 517 518 Returns: 519 Entity: The parent entity of the specified type, or None if not found. 520 """ 521 logger.trace(self.__class__.__name__) 522 if not hasattr(self, "parent") or not self.parent: 523 return 524 if self.parent.__class__.__name__ == parent_type: 525 return self.parent 526 return self.parent.get_parent(parent_type) 527 528 @cached_property 529 def stanzas(self): 530 """ 531 Get the list of stanza entities. 532 533 Returns: 534 StanzaList: A list of stanza entities. 535 """ 536 from .texts import StanzaList 537 538 if self.is_text: 539 o = self.children 540 elif self.is_stanza: 541 o = [self] 542 else: 543 o = [] 544 return StanzaList(o) 545 546 @property 547 def line_r(self): 548 """ 549 Get a random line entity. 550 551 Returns: 552 Line: A random line entity, or None if no lines exist. 553 """ 554 return random.choice(self.lines) if self.lines else None 555 556 @property 557 def word_r(self): 558 """ 559 Get a random word entity. 560 561 Returns: 562 WordToken: A random word entity, or None if no words exist. 563 """ 564 return random.choice(self.words) if self.words else None 565 566 @cached_property 567 def lines(self): 568 """ 569 Get the list of line entities. 570 571 Returns: 572 LineList: A list of line entities. 573 """ 574 from .texts import LineList 575 576 if self.is_stanza: 577 o = self.children 578 elif self.is_line: 579 o = [self] 580 else: 581 o = [line for stanza in self.stanzas for line in stanza.children] 582 return LineList(o) 583 584 @cached_property 585 def wordtokens(self): 586 """ 587 Get the list of word token entities. 588 589 Returns: 590 WordTokenList: A list of word token entities. 591 """ 592 from .words import WordTokenList 593 594 if self.is_line: 595 o = self.children 596 elif self.is_wordtoken: 597 o = [self] 598 else: 599 o = [wt for line in self.lines for wt in line.children] 600 return WordTokenList(o) 601 602 @property 603 def words(self): 604 """ 605 Get the list of word token entities. 606 607 Returns: 608 WordTokenList: A list of word token entities. 609 """ 610 return self.wordtokens 611 612 @cached_property 613 def wordtypes(self): 614 """ 615 Get the list of word type entities. 616 617 Returns: 618 WordTypeList: A list of word type entities. 619 """ 620 from .words import WordTypeList 621 622 if self.is_wordtoken: 623 o = self.children 624 elif self.is_wordtype: 625 o = [self] 626 else: 627 o = [wtype for token in self.wordtokens for wtype in token.children] 628 return WordTypeList(o) 629 630 @cached_property 631 def wordforms(self): 632 """ 633 Get the list of word form entities. 634 635 Returns: 636 WordFormList: A list of word form entities. 637 """ 638 from .words import WordFormList 639 640 if self.is_wordtype: 641 o = self.children[:1] 642 elif self.is_wordtype: 643 o = [self] 644 else: 645 o = [wtype.children[0] for wtype in self.wordtypes if wtype.children] 646 return WordFormList(o) 647 648 @cached_property 649 def wordforms_nopunc(self): 650 """ 651 Get the list of word form entities, excluding punctuation. 652 653 Returns: 654 list: A list of word form entities, excluding punctuation. 655 """ 656 return [wf for wf in self.wordforms if not wf.parent.is_punc] 657 658 @cached_property 659 def wordforms_all(self): 660 """ 661 Get the list of all word form entities. 662 663 Returns: 664 list: A list of all word form entities. 665 """ 666 if self.is_wordtype: 667 o = self.children 668 if self.is_wordform: 669 o = [self] 670 else: 671 o = [wtype.children for wtype in self.wordtypes] 672 return o 673 674 @cached_property 675 def syllables(self): 676 """ 677 Get the list of syllable entities. 678 679 Returns: 680 SyllableList: A list of syllable entities. 681 """ 682 from .words import SyllableList 683 684 if self.is_wordform: 685 o = self.children 686 if self.is_syll: 687 o = [self] 688 else: 689 o = [syll for wf in self.wordforms for syll in wf.children] 690 return SyllableList(o) 691 692 @cached_property 693 def phonemes(self): 694 """ 695 Get the list of phoneme entities. 696 697 Returns: 698 PhonemeList: A list of phoneme entities. 699 """ 700 from .words import PhonemeList 701 702 if self.is_syll: 703 o = self.children 704 if self.is_phon: 705 o = [self] 706 else: 707 o = [phon for syll in self.syllables for phon in syll.children] 708 return PhonemeList(o) 709 710 @cached_property 711 def text(self): 712 """ 713 Get the parent text entity. 714 715 Returns: 716 Text: The parent text entity, or None if not found. 717 """ 718 return self.get_parent("Text") 719 720 @cached_property 721 def stanza(self): 722 """ 723 Get the parent stanza entity. 724 725 Returns: 726 Stanza: The parent stanza entity, or None if not found. 727 """ 728 return self.get_parent("Stanza") 729 730 @cached_property 731 def line(self): 732 """ 733 Get the parent line entity. 734 735 Returns: 736 Line: The parent line entity, or None if not found. 737 """ 738 return self.get_parent("Line") 739 740 @cached_property 741 def wordtoken(self): 742 """ 743 Get the parent word token entity. 744 745 Returns: 746 WordToken: The parent word token entity, or None if not found. 747 """ 748 return self.get_parent("WordToken") 749 750 @cached_property 751 def wordtype(self): 752 """ 753 Get the parent word type entity. 754 755 Returns: 756 WordType: The parent word type entity, or None if not found. 757 """ 758 return self.get_parent("WordType") 759 760 @cached_property 761 def wordform(self): 762 """ 763 Get the parent word form entity. 764 765 Returns: 766 WordForm: The parent word form entity, or None if not found. 767 """ 768 return self.get_parent("WordForm") 769 770 @cached_property 771 def syllable(self): 772 """ 773 Get the parent syllable entity. 774 775 Returns: 776 Syllable: The parent syllable entity, or None if not found. 777 """ 778 return self.get_parent("Syllable") 779 780 @cached_property 781 def i(self): 782 """ 783 Get the index of the entity in its parent's children list. 784 785 Returns: 786 int: The index of the entity, or None if not found. 787 """ 788 if self.parent is None: 789 return None 790 if not self.parent.children: 791 return None 792 try: 793 return self.parent.children.index(self) 794 except IndexError: 795 return None 796 797 @cached_property 798 def num(self): 799 """ 800 Get the 1-based index of the entity in its parent's children list. 801 802 Returns: 803 int: The 1-based index of the entity, or None if not found. 804 """ 805 return self.i + 1 if self.i is not None else None 806 807 @cached_property 808 def next(self): 809 """ 810 Get the next sibling entity. 811 812 Returns: 813 Entity: The next sibling entity, or None if not found. 814 """ 815 if self.i is None: 816 return None 817 try: 818 return self.parent.children[self.i + 1] 819 except IndexError: 820 return None 821 822 @cached_property 823 def prev(self): 824 """ 825 Get the previous sibling entity. 826 827 Returns: 828 Entity: The previous sibling entity, or None if not found. 829 """ 830 if self.i is None: 831 return None 832 i = self.i 833 if i - 1 < 0: 834 return None 835 try: 836 return self.parent.children[i - 1] 837 except IndexError: 838 return None 839 840 @cached_property 841 def is_text(self): 842 """ 843 Check if the entity is a text entity. 844 845 Returns: 846 bool: True if the entity is a text entity, False otherwise. 847 """ 848 return self.__class__.__name__ == "Text" 849 850 @cached_property 851 def is_stanza(self): 852 """ 853 Check if the entity is a stanza entity. 854 855 Returns: 856 bool: True if the entity is a stanza entity, False otherwise. 857 """ 858 return self.__class__.__name__ == "Stanza" 859 860 @cached_property 861 def is_line(self): 862 """ 863 Check if the entity is a line entity. 864 865 Returns: 866 bool: True if the entity is a line entity, False otherwise. 867 """ 868 return self.__class__.__name__ == "Line" 869 870 @cached_property 871 def is_wordtoken(self): 872 """ 873 Check if the entity is a word token entity. 874 875 Returns: 876 bool: True if the entity is a word token entity, False otherwise. 877 """ 878 return self.__class__.__name__ == "WordToken" 879 880 @cached_property 881 def is_wordtype(self): 882 """ 883 Check if the entity is a word type entity. 884 885 Returns: 886 bool: True if the entity is a word type entity, False otherwise. 887 """ 888 return self.__class__.__name__ == "WordType" 889 890 @cached_property 891 def is_wordform(self): 892 """ 893 Check if the entity is a word form entity. 894 895 Returns: 896 bool: True if the entity is a word form entity, False otherwise. 897 """ 898 return self.__class__.__name__ == "WordForm" 899 900 @cached_property 901 def is_syll(self): 902 """ 903 Check if the entity is a syllable entity. 904 905 Returns: 906 bool: True if the entity is a syllable entity, False otherwise. 907 """ 908 return self.__class__.__name__ == "Syllable" 909 910 @cached_property 911 def is_phon(self): 912 """ 913 Check if the entity is a phoneme entity. 914 915 Returns: 916 bool: True if the entity is a phoneme entity, False otherwise. 917 """ 918 return self.__class__.__name__ == "PhonemeClass" 919 920 def children_from_cache(self): 921 """ 922 Get the children of the entity from the cache. 923 924 Returns: 925 list: The list of child entities, or None if not found in the cache. 926 """ 927 if caching_is_enabled(): 928 res = self.from_cache() 929 print("FOUND", res) 930 return None if res is None else res.children 931 932 def get_key(self, key): 933 """ 934 Get a key for caching purposes. 935 936 Args: 937 key: The key object. 938 939 Returns: 940 str: The hashed key. 941 """ 942 if hasattr(key, "to_hash"): 943 key = key.to_hash() 944 elif key: 945 key = hashstr(key) 946 return key 947 948 def from_cache(self, obj=None, key=None, as_dict=False): 949 """ 950 Get an object from the cache. 951 952 Args: 953 obj: The object to cache. 954 key: The key for the cache. 955 as_dict (bool): If True, return the cached data as a dictionary. 956 957 Returns: 958 Any: The cached object, or None if not found. 959 """ 960 if obj is None: 961 obj = self 962 key = self.get_key(obj) if not key else key 963 if key and self.use_cache != False: 964 cache = self.get_cache() 965 if key in cache: 966 dat = cache[key] 967 if dat: 968 return from_json(dat) if not as_dict else dat 969 970 def get_cache(self): 971 """ 972 Get the cache object. 973 974 Returns: 975 SimpleCache: The cache object. 976 """ 977 return SimpleCache() 978 979 def cache( 980 self, key_obj=None, val_obj=None, key=None, force=False 981 ): 982 """ 983 Cache an object. 984 985 Args: 986 key_obj: The object to use as the cache key. 987 val_obj: The object to cache. 988 key: An optional key for the cache. 989 force (bool): If True, force the cache to be updated. 990 """ 991 if key_obj is None: 992 key_obj = self 993 if val_obj is None: 994 val_obj = key_obj 995 logger.trace(f"key_obj = {key_obj}") 996 logger.trace(f"val_obj = {val_obj}") 997 key = self.get_key(key_obj) if not key else key 998 cache = self.get_cache() 999 if key and (force or not key in cache): 1000 with logmap(f"saving object under key {key[:8]}"): 1001 with logmap("exporting to json", level="trace"): 1002 data = val_obj.to_json() 1003 with logmap("uploading json to cache", level="trace"): 1004 cache[key] = data
Root Entity class representing a hierarchical structure in prosodic analysis.
This class serves as the base for various prosodic entities such as texts, stanzas, lines, words, syllables, and phonemes. It provides common functionality for managing hierarchical relationships, attributes, and data representation.
Attributes:
- child_type (str): The type of child entities this entity can contain.
- is_parseable (bool): Whether this entity can be parsed.
- index_name (str): The name used for indexing this entity type.
- prefix (str): A prefix used for attribute naming.
- list_type (type): The type of list used for storing children.
- cached_properties_to_clear (list): Properties to clear from cache.
- use_cache (bool): Whether to use caching for this entity.
- sep (str): Separator used when joining child texts.
34 def __init__(self, txt: str = "", children=[], parent=None, **kwargs): 35 """ 36 Initialize an Entity object. 37 38 Args: 39 txt (str): The text content of the entity. 40 children (list): List of child entities. 41 parent (Entity): The parent entity. 42 **kwargs: Additional attributes to set on the entity. 43 """ 44 self.parent = parent 45 newchildren = [] 46 for child in children: 47 if not isinstance(child, Entity): 48 logger.warning(f"{child} is not an Entity") 49 continue 50 newchildren.append(child) 51 # if not child.is_wordtype: # don't do this for wordtypes since each wordtype is a single/shared python object 52 child.parent = self 53 children = newchildren 54 if self.list_type is None: 55 self.list_type = 'EntityList' 56 from .imports import GLOBALS 57 self.children = GLOBALS[self.list_type](children) 58 self._attrs = kwargs 59 self._txt = txt 60 self._mtr = None 61 for k, v in self._attrs.items(): 62 setattr(self, k, v)
Initialize an Entity object.
Arguments:
- txt (str): The text content of the entity.
- children (list): List of child entities.
- parent (Entity): The parent entity.
- **kwargs: Additional attributes to set on the entity.
73 def to_hash(self): 74 """ 75 Generate a hash representation of the entity. 76 77 Returns: 78 str: A hash string representing the entity's content and attributes. 79 """ 80 return hashstr( 81 self.txt, tuple(sorted(self._attrs.items())), self.__class__.__name__ 82 )
Generate a hash representation of the entity.
Returns:
str: A hash string representing the entity's content and attributes.
84 @cached_property 85 def html(self): 86 """ 87 Get the HTML representation of the entity. 88 89 Returns: 90 str: HTML representation of the entity, if available. 91 """ 92 if hasattr(self, "to_html"): 93 return self.to_html()
Get the HTML representation of the entity.
Returns:
str: HTML representation of the entity, if available.
95 @cached_property 96 def key(self): 97 """ 98 Generate a unique key for the entity. 99 100 Returns: 101 str: A string key representing the entity's class and attributes. 102 """ 103 attrs = { 104 **{k: v for k, v in self.attrs.items() if v is not None}, 105 "txt": self._txt, 106 } 107 return f"{self.__class__.__name__}({get_attr_str(attrs)})"
Generate a unique key for the entity.
Returns:
str: A string key representing the entity's class and attributes.
109 @cached_property 110 def hash(self): 111 """ 112 Get a hash value for the entity. 113 114 Returns: 115 str: A hash string for the entity. 116 """ 117 return hashstr(self.key)
Get a hash value for the entity.
Returns:
str: A hash string for the entity.
149 def to_json(self, fn=None, no_txt=False, yes_txt=False, **kwargs): 150 """ 151 Convert the entity to a JSON representation. 152 153 Args: 154 fn (str, optional): Filename to save the JSON output. 155 no_txt (bool): If True, exclude the text content. 156 yes_txt (bool): If True, include the full text content. 157 **kwargs: Additional key-value pairs to include in the JSON. 158 159 Returns: 160 dict: A dictionary representation of the entity. 161 """ 162 txt = (self._txt if not yes_txt else self.txt) if not no_txt else None 163 return to_json( 164 { 165 "_class": self.__class__.__name__, 166 **({"txt": txt} if txt is not None and (yes_txt or txt) else {}), 167 "children": [kid.to_json() for kid in self.children], 168 **kwargs, 169 }, 170 fn=fn, 171 )
Convert the entity to a JSON representation.
Arguments:
- fn (str, optional): Filename to save the JSON output.
- no_txt (bool): If True, exclude the text content.
- yes_txt (bool): If True, include the full text content.
- **kwargs: Additional key-value pairs to include in the JSON.
Returns:
dict: A dictionary representation of the entity.
173 def save(self, fn, **kwargs): 174 """ 175 Save the entity to a file in JSON format. 176 177 Args: 178 fn (str): The filename to save to. 179 **kwargs: Additional arguments to pass to to_json. 180 181 Returns: 182 The result of to_json with the given filename. 183 """ 184 return self.to_json(fn=fn, **kwargs)
Save the entity to a file in JSON format.
Arguments:
- fn (str): The filename to save to.
- **kwargs: Additional arguments to pass to to_json.
Returns:
The result of to_json with the given filename.
186 def render(self, as_str=False): 187 """ 188 Render the entity as HTML. 189 190 Args: 191 as_str (bool): If True, return the result as a string. 192 193 Returns: 194 str or HTML: The rendered HTML representation of the entity. 195 """ 196 return self.to_html(as_str=as_str)
Render the entity as HTML.
Arguments:
- as_str (bool): If True, return the result as a string.
Returns:
str or HTML: The rendered HTML representation of the entity.
198 @staticmethod 199 def from_json(json_d): 200 """ 201 Create an Entity object from a JSON dictionary. 202 203 Args: 204 json_d (dict): A dictionary containing the entity data. 205 206 Returns: 207 Entity: An instance of the appropriate Entity subclass. 208 """ 209 from .imports import GLOBALS, CHILDCLASSES 210 211 classname = json_d["_class"] 212 classx = GLOBALS[classname] 213 childx = CHILDCLASSES.get(classname) 214 children = json_d.get("children", []) 215 inpd = {k: v for k, v in json_d.items() if k not in {"children", "_class"}} 216 if children and childx: 217 children = [childx.from_json(d) for d in json_d["children"]] 218 return classx(children=tuple(children), **inpd)
Create an Entity object from a JSON dictionary.
Arguments:
- json_d (dict): A dictionary containing the entity data.
Returns:
Entity: An instance of the appropriate Entity subclass.
220 @property 221 def attrs(self): 222 """ 223 Get the attributes of the entity. 224 225 Returns: 226 dict: A dictionary of the entity's attributes. 227 """ 228 odx = {"num": self.num} 229 if ( 230 self.__class__.__name__ 231 not in {"Text", "Stanza", "MeterLine", "MeterText", "Meter"} 232 and self.txt 233 ): 234 odx["txt"] = self.txt 235 return {**odx, **self._attrs}
Get the attributes of the entity.
Returns:
dict: A dictionary of the entity's attributes.
237 @cached_property 238 def prefix_attrs(self, with_parent=True): 239 """ 240 Get the attributes of the entity with a prefix. 241 242 Args: 243 with_parent (bool): If True, include parent attributes. 244 245 Returns: 246 dict: A dictionary of the entity's attributes with a prefix. 247 """ 248 249 def getkey(k): 250 o = f"{self.prefix}_{k}" 251 o = DF_COLS_RENAME.get(o, o) 252 return o 253 254 odx = {getkey(k): v for k, v in self.attrs.items() if v is not None} 255 if with_parent and self.parent: 256 return {**self.parent.prefix_attrs, **odx} 257 return odx
Get the attributes of the entity with a prefix.
Arguments:
- with_parent (bool): If True, include parent attributes.
Returns:
dict: A dictionary of the entity's attributes with a prefix.
259 @cached_property 260 def txt(self): 261 """ 262 Get the text content of the entity. 263 264 Returns: 265 str: The text content of the entity. 266 """ 267 if self._txt: 268 txt = self._txt 269 elif self.children: 270 txt = self.child_class.sep.join(child.txt for child in self.children) 271 else: 272 txt = "" 273 return clean_text(txt)
Get the text content of the entity.
Returns:
str: The text content of the entity.
275 @cached_property 276 def data(self): 277 """ 278 Get the data associated with the entity. 279 280 Returns: 281 list: The list of child entities. 282 """ 283 return self.children
Get the data associated with the entity.
Returns:
list: The list of child entities.
285 @cached_property 286 def l(self): 287 """ 288 Get the list of child entities. 289 290 Returns: 291 list: The list of child entities. 292 """ 293 return self.children
Get the list of child entities.
Returns:
list: The list of child entities.
295 def clear_cached_properties(self): 296 """ 297 Clear cached properties to free up memory. 298 """ 299 for prop in self.cached_properties_to_clear: 300 if prop in self.__dict__: 301 del self.__dict__[prop] 302 # elif hasattr(self,prop): 303 # try: 304 # func = getattr(self,prop) 305 # func.clear_cache() 306 # except AttributeError: 307 # pass
Clear cached properties to free up memory.
309 def inspect(self, indent=0, maxlines=None, incl_phons=False): 310 """ 311 Inspect the entity and its children. 312 313 Args: 314 indent (int): The indentation level for the output. 315 maxlines (int): The maximum number of lines to display. 316 incl_phons (bool): If True, include phoneme information. 317 """ 318 attrstr = get_attr_str(self.attrs) 319 myself = f"{self.__class__.__name__}({attrstr})" 320 if indent: 321 myself = textwrap.indent(myself, "|" + (" " * (indent - 1))) 322 lines = [myself] 323 for child in self.children: 324 if isinstance(child, Entity) and ( 325 incl_phons or not child.__class__.__name__.startswith("Phoneme") 326 ): 327 lines.append( 328 child.inspect(indent=indent + 4, incl_phons=incl_phons).replace( 329 "PhonemeClass", "Phoneme" 330 ) 331 ) 332 # self.__class__.__name__ in {'Text', 'Stanza', 'Line'} 333 dblbreakfor = False 334 breakstr = "\n|\n" if dblbreakfor else "\n" 335 o = breakstr.join(lines) 336 if not indent: 337 if maxlines: 338 o = "\n".join(o.split("\n")[:maxlines]) 339 print(o) 340 else: 341 return o
Inspect the entity and its children.
Arguments:
- indent (int): The indentation level for the output.
- maxlines (int): The maximum number of lines to display.
- incl_phons (bool): If True, include phoneme information.
382 @cached_property 383 def ld(self): 384 """ 385 Get a list of dictionaries representing the entity and its children. 386 387 Returns: 388 list: A list of dictionaries representing the entity and its children. 389 """ 390 return self.get_ld()
Get a list of dictionaries representing the entity and its children.
Returns:
list: A list of dictionaries representing the entity and its children.
392 @cached_property 393 def child_class(self): 394 """ 395 Get the class of the child entities. 396 397 Returns: 398 type: The class of the child entities. 399 """ 400 from .imports import GLOBALS 401 402 return GLOBALS.get(self.child_type)
Get the class of the child entities.
Returns:
type: The class of the child entities.
404 def get_ld(self, incl_phons=False, incl_sylls=True, multiple_wordforms=True): 405 """ 406 Get a list of dictionaries representing the entity and its children. 407 408 Args: 409 incl_phons (bool): If True, include phoneme information. 410 incl_sylls (bool): If True, include syllable information. 411 multiple_wordforms (bool): If True, include multiple word forms. 412 413 Returns: 414 list: A list of dictionaries representing the entity and its children. 415 """ 416 if not incl_sylls and self.child_type == "Syllable": 417 return [{**self.prefix_attrs}] 418 if not incl_phons and self.child_type == "Phoneme": 419 return [{**self.prefix_attrs}] 420 good_children = [c for c in self.children if isinstance(c, Entity)] 421 # logger.debug(f'good children of {type(self)} -> {good_children}') 422 if not multiple_wordforms and self.child_type == "WordForm" and good_children: 423 good_children = good_children[:1] 424 # logger.debug(f'good children now {good_children}') 425 if good_children: 426 return [ 427 {**self.prefix_attrs, **child.prefix_attrs, **grandchild_d} 428 for child in good_children 429 for grandchild_d in child.get_ld( 430 incl_phons=incl_phons, 431 incl_sylls=incl_sylls, 432 multiple_wordforms=multiple_wordforms, 433 ) 434 ] 435 else: 436 return [{**self.prefix_attrs}]
Get a list of dictionaries representing the entity and its children.
Arguments:
- incl_phons (bool): If True, include phoneme information.
- incl_sylls (bool): If True, include syllable information.
- multiple_wordforms (bool): If True, include multiple word forms.
Returns:
list: A list of dictionaries representing the entity and its children.
438 def get_df(self, **kwargs): 439 """ 440 Get a DataFrame representation of the entity and its children. 441 442 Args: 443 **kwargs: Additional arguments to pass to get_ld. 444 445 Returns: 446 DataFrame: A DataFrame representation of the entity and its children. 447 """ 448 odf = pd.DataFrame(self.get_ld(**kwargs)) 449 for c in DF_BADCOLS: 450 if c in set(odf.columns): 451 odf = odf.drop(c, axis=1) 452 for c in odf: 453 if c.endswith("_num"): 454 odf[c] = odf[c].fillna(0).apply(int) 455 else: 456 odf[c] = odf[c].fillna("") 457 odf = setindex(odf, DF_INDEX) 458 459 def unbool(x): 460 if x is True: 461 return 1 462 if x is False: 463 return 0 464 if x is None: 465 return 0 466 return x 467 468 odf = odf.applymap(unbool) 469 return odf
Get a DataFrame representation of the entity and its children.
Arguments:
- **kwargs: Additional arguments to pass to get_ld.
Returns:
DataFrame: A DataFrame representation of the entity and its children.
471 @cached_property 472 def df(self): 473 """ 474 Get a DataFrame representation of the entity and its children. 475 476 Returns: 477 DataFrame: A DataFrame representation of the entity and its children. 478 """ 479 return self.get_df()
Get a DataFrame representation of the entity and its children.
Returns:
DataFrame: A DataFrame representation of the entity and its children.
511 def get_parent(self, parent_type=None): 512 """ 513 Get the parent entity of a specific type. 514 515 Args: 516 parent_type (str): The type of parent entity to find. 517 518 Returns: 519 Entity: The parent entity of the specified type, or None if not found. 520 """ 521 logger.trace(self.__class__.__name__) 522 if not hasattr(self, "parent") or not self.parent: 523 return 524 if self.parent.__class__.__name__ == parent_type: 525 return self.parent 526 return self.parent.get_parent(parent_type)
Get the parent entity of a specific type.
Arguments:
- parent_type (str): The type of parent entity to find.
Returns:
Entity: The parent entity of the specified type, or None if not found.
528 @cached_property 529 def stanzas(self): 530 """ 531 Get the list of stanza entities. 532 533 Returns: 534 StanzaList: A list of stanza entities. 535 """ 536 from .texts import StanzaList 537 538 if self.is_text: 539 o = self.children 540 elif self.is_stanza: 541 o = [self] 542 else: 543 o = [] 544 return StanzaList(o)
Get the list of stanza entities.
Returns:
StanzaList: A list of stanza entities.
546 @property 547 def line_r(self): 548 """ 549 Get a random line entity. 550 551 Returns: 552 Line: A random line entity, or None if no lines exist. 553 """ 554 return random.choice(self.lines) if self.lines else None
Get a random line entity.
Returns:
Line: A random line entity, or None if no lines exist.
556 @property 557 def word_r(self): 558 """ 559 Get a random word entity. 560 561 Returns: 562 WordToken: A random word entity, or None if no words exist. 563 """ 564 return random.choice(self.words) if self.words else None
Get a random word entity.
Returns:
WordToken: A random word entity, or None if no words exist.
566 @cached_property 567 def lines(self): 568 """ 569 Get the list of line entities. 570 571 Returns: 572 LineList: A list of line entities. 573 """ 574 from .texts import LineList 575 576 if self.is_stanza: 577 o = self.children 578 elif self.is_line: 579 o = [self] 580 else: 581 o = [line for stanza in self.stanzas for line in stanza.children] 582 return LineList(o)
Get the list of line entities.
Returns:
LineList: A list of line entities.
584 @cached_property 585 def wordtokens(self): 586 """ 587 Get the list of word token entities. 588 589 Returns: 590 WordTokenList: A list of word token entities. 591 """ 592 from .words import WordTokenList 593 594 if self.is_line: 595 o = self.children 596 elif self.is_wordtoken: 597 o = [self] 598 else: 599 o = [wt for line in self.lines for wt in line.children] 600 return WordTokenList(o)
Get the list of word token entities.
Returns:
WordTokenList: A list of word token entities.
602 @property 603 def words(self): 604 """ 605 Get the list of word token entities. 606 607 Returns: 608 WordTokenList: A list of word token entities. 609 """ 610 return self.wordtokens
Get the list of word token entities.
Returns:
WordTokenList: A list of word token entities.
612 @cached_property 613 def wordtypes(self): 614 """ 615 Get the list of word type entities. 616 617 Returns: 618 WordTypeList: A list of word type entities. 619 """ 620 from .words import WordTypeList 621 622 if self.is_wordtoken: 623 o = self.children 624 elif self.is_wordtype: 625 o = [self] 626 else: 627 o = [wtype for token in self.wordtokens for wtype in token.children] 628 return WordTypeList(o)
Get the list of word type entities.
Returns:
WordTypeList: A list of word type entities.
630 @cached_property 631 def wordforms(self): 632 """ 633 Get the list of word form entities. 634 635 Returns: 636 WordFormList: A list of word form entities. 637 """ 638 from .words import WordFormList 639 640 if self.is_wordtype: 641 o = self.children[:1] 642 elif self.is_wordtype: 643 o = [self] 644 else: 645 o = [wtype.children[0] for wtype in self.wordtypes if wtype.children] 646 return WordFormList(o)
Get the list of word form entities.
Returns:
WordFormList: A list of word form entities.
648 @cached_property 649 def wordforms_nopunc(self): 650 """ 651 Get the list of word form entities, excluding punctuation. 652 653 Returns: 654 list: A list of word form entities, excluding punctuation. 655 """ 656 return [wf for wf in self.wordforms if not wf.parent.is_punc]
Get the list of word form entities, excluding punctuation.
Returns:
list: A list of word form entities, excluding punctuation.
658 @cached_property 659 def wordforms_all(self): 660 """ 661 Get the list of all word form entities. 662 663 Returns: 664 list: A list of all word form entities. 665 """ 666 if self.is_wordtype: 667 o = self.children 668 if self.is_wordform: 669 o = [self] 670 else: 671 o = [wtype.children for wtype in self.wordtypes] 672 return o
Get the list of all word form entities.
Returns:
list: A list of all word form entities.
674 @cached_property 675 def syllables(self): 676 """ 677 Get the list of syllable entities. 678 679 Returns: 680 SyllableList: A list of syllable entities. 681 """ 682 from .words import SyllableList 683 684 if self.is_wordform: 685 o = self.children 686 if self.is_syll: 687 o = [self] 688 else: 689 o = [syll for wf in self.wordforms for syll in wf.children] 690 return SyllableList(o)
Get the list of syllable entities.
Returns:
SyllableList: A list of syllable entities.
692 @cached_property 693 def phonemes(self): 694 """ 695 Get the list of phoneme entities. 696 697 Returns: 698 PhonemeList: A list of phoneme entities. 699 """ 700 from .words import PhonemeList 701 702 if self.is_syll: 703 o = self.children 704 if self.is_phon: 705 o = [self] 706 else: 707 o = [phon for syll in self.syllables for phon in syll.children] 708 return PhonemeList(o)
Get the list of phoneme entities.
Returns:
PhonemeList: A list of phoneme entities.
710 @cached_property 711 def text(self): 712 """ 713 Get the parent text entity. 714 715 Returns: 716 Text: The parent text entity, or None if not found. 717 """ 718 return self.get_parent("Text")
Get the parent text entity.
Returns:
Text: The parent text entity, or None if not found.
720 @cached_property 721 def stanza(self): 722 """ 723 Get the parent stanza entity. 724 725 Returns: 726 Stanza: The parent stanza entity, or None if not found. 727 """ 728 return self.get_parent("Stanza")
Get the parent stanza entity.
Returns:
Stanza: The parent stanza entity, or None if not found.
730 @cached_property 731 def line(self): 732 """ 733 Get the parent line entity. 734 735 Returns: 736 Line: The parent line entity, or None if not found. 737 """ 738 return self.get_parent("Line")
Get the parent line entity.
Returns:
Line: The parent line entity, or None if not found.
740 @cached_property 741 def wordtoken(self): 742 """ 743 Get the parent word token entity. 744 745 Returns: 746 WordToken: The parent word token entity, or None if not found. 747 """ 748 return self.get_parent("WordToken")
Get the parent word token entity.
Returns:
WordToken: The parent word token entity, or None if not found.
750 @cached_property 751 def wordtype(self): 752 """ 753 Get the parent word type entity. 754 755 Returns: 756 WordType: The parent word type entity, or None if not found. 757 """ 758 return self.get_parent("WordType")
Get the parent word type entity.
Returns:
WordType: The parent word type entity, or None if not found.
760 @cached_property 761 def wordform(self): 762 """ 763 Get the parent word form entity. 764 765 Returns: 766 WordForm: The parent word form entity, or None if not found. 767 """ 768 return self.get_parent("WordForm")
Get the parent word form entity.
Returns:
WordForm: The parent word form entity, or None if not found.
770 @cached_property 771 def syllable(self): 772 """ 773 Get the parent syllable entity. 774 775 Returns: 776 Syllable: The parent syllable entity, or None if not found. 777 """ 778 return self.get_parent("Syllable")
Get the parent syllable entity.
Returns:
Syllable: The parent syllable entity, or None if not found.
780 @cached_property 781 def i(self): 782 """ 783 Get the index of the entity in its parent's children list. 784 785 Returns: 786 int: The index of the entity, or None if not found. 787 """ 788 if self.parent is None: 789 return None 790 if not self.parent.children: 791 return None 792 try: 793 return self.parent.children.index(self) 794 except IndexError: 795 return None
Get the index of the entity in its parent's children list.
Returns:
int: The index of the entity, or None if not found.
797 @cached_property 798 def num(self): 799 """ 800 Get the 1-based index of the entity in its parent's children list. 801 802 Returns: 803 int: The 1-based index of the entity, or None if not found. 804 """ 805 return self.i + 1 if self.i is not None else None
Get the 1-based index of the entity in its parent's children list.
Returns:
int: The 1-based index of the entity, or None if not found.
807 @cached_property 808 def next(self): 809 """ 810 Get the next sibling entity. 811 812 Returns: 813 Entity: The next sibling entity, or None if not found. 814 """ 815 if self.i is None: 816 return None 817 try: 818 return self.parent.children[self.i + 1] 819 except IndexError: 820 return None
Get the next sibling entity.
Returns:
Entity: The next sibling entity, or None if not found.
822 @cached_property 823 def prev(self): 824 """ 825 Get the previous sibling entity. 826 827 Returns: 828 Entity: The previous sibling entity, or None if not found. 829 """ 830 if self.i is None: 831 return None 832 i = self.i 833 if i - 1 < 0: 834 return None 835 try: 836 return self.parent.children[i - 1] 837 except IndexError: 838 return None
Get the previous sibling entity.
Returns:
Entity: The previous sibling entity, or None if not found.
840 @cached_property 841 def is_text(self): 842 """ 843 Check if the entity is a text entity. 844 845 Returns: 846 bool: True if the entity is a text entity, False otherwise. 847 """ 848 return self.__class__.__name__ == "Text"
Check if the entity is a text entity.
Returns:
bool: True if the entity is a text entity, False otherwise.
850 @cached_property 851 def is_stanza(self): 852 """ 853 Check if the entity is a stanza entity. 854 855 Returns: 856 bool: True if the entity is a stanza entity, False otherwise. 857 """ 858 return self.__class__.__name__ == "Stanza"
Check if the entity is a stanza entity.
Returns:
bool: True if the entity is a stanza entity, False otherwise.
860 @cached_property 861 def is_line(self): 862 """ 863 Check if the entity is a line entity. 864 865 Returns: 866 bool: True if the entity is a line entity, False otherwise. 867 """ 868 return self.__class__.__name__ == "Line"
Check if the entity is a line entity.
Returns:
bool: True if the entity is a line entity, False otherwise.
870 @cached_property 871 def is_wordtoken(self): 872 """ 873 Check if the entity is a word token entity. 874 875 Returns: 876 bool: True if the entity is a word token entity, False otherwise. 877 """ 878 return self.__class__.__name__ == "WordToken"
Check if the entity is a word token entity.
Returns:
bool: True if the entity is a word token entity, False otherwise.
880 @cached_property 881 def is_wordtype(self): 882 """ 883 Check if the entity is a word type entity. 884 885 Returns: 886 bool: True if the entity is a word type entity, False otherwise. 887 """ 888 return self.__class__.__name__ == "WordType"
Check if the entity is a word type entity.
Returns:
bool: True if the entity is a word type entity, False otherwise.
890 @cached_property 891 def is_wordform(self): 892 """ 893 Check if the entity is a word form entity. 894 895 Returns: 896 bool: True if the entity is a word form entity, False otherwise. 897 """ 898 return self.__class__.__name__ == "WordForm"
Check if the entity is a word form entity.
Returns:
bool: True if the entity is a word form entity, False otherwise.
900 @cached_property 901 def is_syll(self): 902 """ 903 Check if the entity is a syllable entity. 904 905 Returns: 906 bool: True if the entity is a syllable entity, False otherwise. 907 """ 908 return self.__class__.__name__ == "Syllable"
Check if the entity is a syllable entity.
Returns:
bool: True if the entity is a syllable entity, False otherwise.
910 @cached_property 911 def is_phon(self): 912 """ 913 Check if the entity is a phoneme entity. 914 915 Returns: 916 bool: True if the entity is a phoneme entity, False otherwise. 917 """ 918 return self.__class__.__name__ == "PhonemeClass"
Check if the entity is a phoneme entity.
Returns:
bool: True if the entity is a phoneme entity, False otherwise.
920 def children_from_cache(self): 921 """ 922 Get the children of the entity from the cache. 923 924 Returns: 925 list: The list of child entities, or None if not found in the cache. 926 """ 927 if caching_is_enabled(): 928 res = self.from_cache() 929 print("FOUND", res) 930 return None if res is None else res.children
Get the children of the entity from the cache.
Returns:
list: The list of child entities, or None if not found in the cache.
932 def get_key(self, key): 933 """ 934 Get a key for caching purposes. 935 936 Args: 937 key: The key object. 938 939 Returns: 940 str: The hashed key. 941 """ 942 if hasattr(key, "to_hash"): 943 key = key.to_hash() 944 elif key: 945 key = hashstr(key) 946 return key
Get a key for caching purposes.
Arguments:
- key: The key object.
Returns:
str: The hashed key.
948 def from_cache(self, obj=None, key=None, as_dict=False): 949 """ 950 Get an object from the cache. 951 952 Args: 953 obj: The object to cache. 954 key: The key for the cache. 955 as_dict (bool): If True, return the cached data as a dictionary. 956 957 Returns: 958 Any: The cached object, or None if not found. 959 """ 960 if obj is None: 961 obj = self 962 key = self.get_key(obj) if not key else key 963 if key and self.use_cache != False: 964 cache = self.get_cache() 965 if key in cache: 966 dat = cache[key] 967 if dat: 968 return from_json(dat) if not as_dict else dat
Get an object from the cache.
Arguments:
- obj: The object to cache.
- key: The key for the cache.
- as_dict (bool): If True, return the cached data as a dictionary.
Returns:
Any: The cached object, or None if not found.
970 def get_cache(self): 971 """ 972 Get the cache object. 973 974 Returns: 975 SimpleCache: The cache object. 976 """ 977 return SimpleCache()
Get the cache object.
Returns:
SimpleCache: The cache object.
979 def cache( 980 self, key_obj=None, val_obj=None, key=None, force=False 981 ): 982 """ 983 Cache an object. 984 985 Args: 986 key_obj: The object to use as the cache key. 987 val_obj: The object to cache. 988 key: An optional key for the cache. 989 force (bool): If True, force the cache to be updated. 990 """ 991 if key_obj is None: 992 key_obj = self 993 if val_obj is None: 994 val_obj = key_obj 995 logger.trace(f"key_obj = {key_obj}") 996 logger.trace(f"val_obj = {val_obj}") 997 key = self.get_key(key_obj) if not key else key 998 cache = self.get_cache() 999 if key and (force or not key in cache): 1000 with logmap(f"saving object under key {key[:8]}"): 1001 with logmap("exporting to json", level="trace"): 1002 data = val_obj.to_json() 1003 with logmap("uploading json to cache", level="trace"): 1004 cache[key] = data
Cache an object.
Arguments:
- key_obj: The object to use as the cache key.
- val_obj: The object to cache.
- key: An optional key for the cache.
- force (bool): If True, force the cache to be updated.
Inherited Members
- collections.UserList
- append
- insert
- pop
- remove
- clear
- copy
- count
- index
- reverse
- sort
- extend
1007class EntityList(Entity): 1008 """ 1009 A list of Entity objects. 1010 """ 1011 1012 def __init__(self, children=[], parent=None, **kwargs): 1013 """ 1014 Initialize an EntityList object. 1015 1016 Args: 1017 children (list): List of child entities. 1018 parent (Entity): The parent entity. 1019 **kwargs: Additional attributes to set on the entity. 1020 """ 1021 self.parent = parent 1022 self.children = [x for x in children] 1023 self._attrs = kwargs 1024 self._txt = None 1025 for k, v in self._attrs.items(): 1026 setattr(self, k, v) 1027 1028 @cached_property 1029 def txt(self): 1030 """ 1031 Get the text content of the entity list. 1032 1033 Returns: 1034 None: Always returns None for EntityList objects. 1035 """ 1036 return None
A list of Entity objects.
1012 def __init__(self, children=[], parent=None, **kwargs): 1013 """ 1014 Initialize an EntityList object. 1015 1016 Args: 1017 children (list): List of child entities. 1018 parent (Entity): The parent entity. 1019 **kwargs: Additional attributes to set on the entity. 1020 """ 1021 self.parent = parent 1022 self.children = [x for x in children] 1023 self._attrs = kwargs 1024 self._txt = None 1025 for k, v in self._attrs.items(): 1026 setattr(self, k, v)
Initialize an EntityList object.
Arguments:
- children (list): List of child entities.
- parent (Entity): The parent entity.
- **kwargs: Additional attributes to set on the entity.
1028 @cached_property 1029 def txt(self): 1030 """ 1031 Get the text content of the entity list. 1032 1033 Returns: 1034 None: Always returns None for EntityList objects. 1035 """ 1036 return None
Get the text content of the entity list.
Returns:
None: Always returns None for EntityList objects.
Inherited Members
- Entity
- child_type
- is_parseable
- index_name
- prefix
- list_type
- cached_properties_to_clear
- use_cache
- sep
- to_hash
- html
- key
- hash
- to_json
- save
- render
- from_json
- attrs
- prefix_attrs
- data
- l
- clear_cached_properties
- inspect
- ld
- child_class
- get_ld
- get_df
- df
- get_parent
- stanzas
- line_r
- word_r
- lines
- wordtokens
- words
- wordtypes
- wordforms
- wordforms_nopunc
- wordforms_all
- syllables
- phonemes
- text
- stanza
- line
- wordtoken
- wordtype
- wordform
- syllable
- i
- num
- next
- prev
- is_text
- is_stanza
- is_line
- is_wordtoken
- is_wordtype
- is_wordform
- is_syll
- is_phon
- children_from_cache
- get_key
- from_cache
- get_cache
- cache
- collections.UserList
- append
- insert
- pop
- remove
- clear
- copy
- count
- index
- reverse
- sort
- extend