# import
import prosodic
= prosodic.Text(
sonnetV """Those hours, that with gentle work did frame
The lovely gaze where every eye doth dwell,
Will play the tyrants to the very same
And that unfair which fairly doth excel;
For never-resting time leads summer on
To hideous winter, and confounds him there;
Sap checked with frost, and lusty leaves quite gone,
Beauty o’er-snowed and bareness every where:
Then were not summer’s distillation left,
A liquid prisoner pent in walls of glass,
Beauty’s effect with beauty were bereft,
Nor it, nor no remembrance what it was:
But flowers distill’d, though they with winter meet,
Leese but their show; their substance still lives sweet."""
)
Architecture
Texts
Code contained in prosodic.texts
.
Reading texts
Loading by string
You can load any text with a string:
Loading texts by filename
Can also read texts (especially larger ones) by filename:
import os
= os.path.join(
shakespeare_sonnets_filename
prosodic.PATH_REPO, 'corpora','corppoetry_en','en.shakespeare.txt'
)
# read a text by string
= prosodic.Text(fn=shakespeare_sonnets_filename) sonnets
[2.46s] Building long text: 0%| | 0/20317 [00:00<?, ?it/s][2.46s] Building long text: 4%|▎ | 761/20317 [00:00<00:02, 7609.15it/s][2.46s] Building long text: 7%|▋ | 1522/20317 [00:00<00:04, 3936.37it/s][2.46s] Building long text: 11%|█▏ | 2321/20317 [00:00<00:03, 5162.95it/s][2.46s] Building long text: 16%|█▌ | 3161/20317 [00:00<00:02, 6139.10it/s][2.46s] Building long text: 20%|█▉ | 4026/20317 [00:00<00:02, 6894.51it/s][2.46s] Building long text: 24%|██▍ | 4892/20317 [00:00<00:02, 7424.50it/s][2.46s] Building long text: 28%|██▊ | 5787/20317 [00:00<00:01, 7882.81it/s][2.46s] Building long text: 33%|███▎ | 6688/20317 [00:00<00:01, 8219.73it/s][2.46s] Building long text: 37%|███▋ | 7570/20317 [00:01<00:02, 5213.52it/s][2.46s] Building long text: 41%|████▏ | 8390/20317 [00:01<00:02, 5840.28it/s][2.46s] Building long text: 46%|████▌ | 9252/20317 [00:01<00:01, 6481.35it/s][2.46s] Building long text: 50%|████▉ | 10116/20317 [00:01<00:01, 7014.78it/s][2.46s] Building long text: 54%|█████▍ | 11005/20317 [00:01<00:01, 7504.10it/s][2.46s] Building long text: 59%|█████▊ | 11891/20317 [00:01<00:01, 7871.11it/s][2.46s] Building long text: 63%|██████▎ | 12747/20317 [00:01<00:00, 8056.46it/s][2.46s] Building long text: 67%|██████▋ | 13614/20317 [00:01<00:00, 8231.36it/s][2.46s] Building long text: 71%|███████▏ | 14494/20317 [00:02<00:00, 8395.19it/s][2.46s] Building long text: 76%|███████▌ | 15355/20317 [00:02<00:00, 5247.51it/s][2.46s] Building long text: 80%|███████▉ | 16204/20317 [00:02<00:00, 5913.21it/s][2.46s] Building long text: 84%|████████▍ | 17069/20317 [00:02<00:00, 6535.04it/s][2.46s] Building long text: 89%|████████▊ | 17982/20317 [00:02<00:00, 7168.13it/s][2.46s] Building long text: 93%|█████████▎| 18875/20317 [00:02<00:00, 7624.63it/s][2.46s] Building long text: 97%|█████████▋| 19751/20317 [00:02<00:00, 7929.59it/s]
Displaying texts
In a notebook environmnent, texts objects will display a by-syllable dataframe of the text structure it contains, stored at text.df
# these will display the same, but former actually points to the dataframe
sonnetV.df sonnetV
wordtoken_is_punc | |||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
stanza_num | line_num | linepart_num | sent_num | sentpart_num | wordtoken_num | wordtoken_txt | wordtype_txt | wordform_num | wordform_ipa_origin | syll_num | syll_txt | syll_ipa | |
1 | 1 | 1 | 1 | 1 | 1 | Those | Those | 1 | dict | 1 | Those | ðoʊz | 0 |
2 | hours | hours | 1 | dict | 1 | hours | 'aʊrz | 0 | |||||
2 | dict | 1 | ho | 'aʊ | 0 | ||||||||
2 | urs | ɛːz | 0 | ||||||||||
3 | , | , | 0 | 0 | 1 | ||||||||
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | |
14 | 30 | 1 | 17 | 121 | substance | substance | 1 | dict | 2 | tance | stəns | 0 | |
122 | still | still | 1 | dict | 1 | still | 'stɪl | 0 | |||||
123 | lives | lives | 1 | dict | 1 | lives | 'lɪvz | 0 | |||||
124 | sweet | sweet | 1 | dict | 1 | sweet | 'swiːt | 0 | |||||
125 | . | . | 0 | 0 | 1 |
187 rows × 1 columns
Stanzas
Accessing stanzas
Stanza separations are detected by two line breaks in the input text. You can access stanza objects through a text object:
assert len(sonnets.stanzas) == 154 # number of shakespeare sonnets
# can iterate over them simply by iterating over text object:
for stanza in sonnets:
pass
# you can also reach stanzas by .stanza###
sonnets.stanza154.df
wordtoken_is_punc | |||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
stanza_num | line_num | linepart_num | sent_num | sentpart_num | wordtoken_num | wordtoken_txt | wordtype_txt | wordform_num | wordform_ipa_origin | syll_num | syll_txt | syll_ipa | |
154 | 2142 | 4723 | 521 | 2582 | 20194 | \n\nThe | The | 1 | dict | 1 | The | ðə | 0 |
20195 | little | little | 1 | dict | 1 | lit | 'lɪ | 0 | |||||
2 | tle | təl | 0 | ||||||||||
20196 | Love | Love | 1 | dict | 1 | Love | 'lʌv | 0 | |||||
20197 | - | - | 0 | 0 | 1 | ||||||||
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | |
2155 | 4748 | 522 | 2594 | 20314 | cools | cools | 1 | dict | 1 | cools | 'kuːlz | 0 | |
20315 | not | not | 1 | dict | 1 | not | nɑt | 0 | |||||
2 | dict | 1 | not | 'nɑt | 0 | ||||||||
20316 | love | love | 1 | dict | 1 | love | 'lʌv | 0 | |||||
20317 | . | . | 0 | 0 | 1 |
173 rows × 1 columns
Displaying stanzas
By default, sonnets will display parsed:
sonnets.stanza154
[8.68s] Parsing lineparts [8x]: 0%| | 0/19 [00:00<?, ?it/s][8.68s] Parsing lineparts [8x]: 5%|▌ | 1/19 [00:00<00:02, 8.30it/s][8.68s] Parsing lineparts [8x]: 37%|███▋ | 7/19 [00:00<00:00, 36.20it/s][8.68s] Parsing lineparts [8x]: 58%|█████▊ | 11/19 [00:00<00:00, 26.34it/s][8.68s] Parsing lineparts [8x]: 79%|███████▉ | 15/19 [00:00<00:00, 24.34it/s][8.68s] Parsing lineparts [8x]: 95%|█████████▍| 18/19 [00:00<00:00, 21.88it/s]
- The little Love-god lying once asleep
- Laid by his side his heart-inflaming brand,
- Whilst many nymphs that vow'd chaste life to keep
- Came tripping by; but in her maiden hand
- The fairest votary took up that fire
- Which many legions of true hearts had warm'd;
- And so the general of hot desire
- Was sleeping by a virgin hand disarm'd.
- This brand she quenched in a cool well by,
- Which from Love's fire took heat perpetual,
- Growing a bath and healthful remedy
- For men diseased; but I, my mistress' thrall,
- Came there for cure, and this by that I prove,
- Love's fire heats water, water cools not love.
The red indicates violations and allows for nice comparison with other poems and parses. You can display the same thing on a text with text.render()
sonnetV.render()
[1.26s] Parsing lineparts [8x]: 0%| | 0/20 [00:00<?, ?it/s]
[1.26s] Parsing lineparts [8x]: 40%|████ | 8/20 [00:00<00:00, 79.09it/s]
[1.26s] Parsing lineparts [8x]: 95%|█████████▌| 19/20 [00:00<00:00, 85.21it/s]
- Those hours, that with gentle work did frame
- The lovely gaze where every eye doth dwell,
- Will play the tyrants to the very same
- And that unfair which fairly doth excel;
- For never-resting time leads summer on
- To hideous winter, and confounds him there;
- Sap checked with frost, and lusty leaves quite gone,
- Beauty o'er-snowed and bareness every where:
- Then were not summer's distillation left,
- A liquid prisoner pent in walls of glass,
- Beauty's effect with beauty were bereft,
- Nor it, nor no remembrance what it was:
- But flowers distill'd, though they with winter meet,
- Leese but their show; their substance still lives sweet.
Lines
Lines are important objects because (at present) they are the only objects actually considered as the unit of metrical parsing to the parser.
You can access them in a few ways:
# you can also reach them by line number
sonnetV.line14
# which are relative to the stanza
sonnets.stanza5.line14
wordtoken_is_punc | |||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
stanza_num | line_num | linepart_num | sent_num | sentpart_num | wordtoken_num | wordtoken_txt | wordtype_txt | wordform_num | wordform_ipa_origin | syll_num | syll_txt | syll_ipa | |
5 | 70 | 150 | 19 | 81 | 622 | \nLeese | Leese | 1 | dict | 1 | Leese | 'liːs | 0 |
623 | but | but | 1 | dict | 1 | but | bət | 0 | |||||
624 | their | their | 1 | dict | 1 | their | ðɛr | 0 | |||||
625 | show | show | 1 | dict | 1 | show | 'ʃoʊ | 0 | |||||
626 | ; | ; | 0 | 0 | 1 | ||||||||
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ||
151 | 19 | 82 | 628 | substance | substance | 1 | dict | 2 | tance | stəns | 0 | ||
629 | still | still | 1 | dict | 1 | still | 'stɪl | 0 | |||||
630 | lives | lives | 1 | dict | 1 | lives | 'lɪvz | 0 | |||||
631 | sweet | sweet | 1 | dict | 1 | sweet | 'swiːt | 0 | |||||
632 | . | . | 0 | 0 | 1 |
12 rows × 1 columns
You can also create them directly:
= prosodic.Text("A horse, a horse, my kingdom for a horse!").line1
line line
wordtoken_is_punc | |||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
stanza_num | line_num | linepart_num | sent_num | sentpart_num | wordtoken_num | wordtoken_txt | wordtype_txt | wordform_num | wordform_ipa_origin | syll_num | syll_txt | syll_ipa | |
1 | 1 | 1 | 1 | 1 | 1 | A | A | 1 | dict | 1 | A | eɪ | 0 |
2 | horse | horse | 1 | dict | 1 | horse | 'hɔːrs | 0 | |||||
3 | , | , | 0 | 0 | 1 | ||||||||
2 | 1 | 2 | 4 | a | a | 1 | dict | 1 | a | eɪ | 0 | ||
5 | horse | horse | 1 | dict | 1 | horse | 'hɔːrs | 0 | |||||
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ||
3 | 1 | 3 | 8 | kingdom | kingdom | 1 | dict | 2 | dom | dəm | 0 | ||
9 | for | for | 1 | dict | 1 | for | fɔːr | 0 | |||||
10 | a | a | 1 | dict | 1 | a | eɪ | 0 | |||||
11 | horse | horse | 1 | dict | 1 | horse | 'hɔːrs | 0 | |||||
12 | ! | ! | 0 | 0 | 1 |
13 rows × 1 columns