| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | from __future__ import absolute_import |
| | from __future__ import print_function |
| | from __future__ import unicode_literals |
| | from collections import deque |
| | from itertools import chain |
| | from pprint import pformat |
| | from tempfile import TemporaryFile |
| | import base64 |
| | import logging |
| | import sys |
| |
|
| | from . import binmodel |
| | from . import filestructure |
| | from .binmodel.controls import SectionDef |
| | from .binmodel.controls import TableControl |
| | from .binmodel.controls import GShapeObjectControl |
| | from .binmodel import BinData |
| | from .binmodel import ListHeader |
| | from .binmodel import Paragraph |
| | from .binmodel import Text |
| | from .binmodel import ShapeComponent |
| | from .binmodel import TableBody |
| | from .binmodel import TableCell |
| | from .binmodel import ParaText |
| | from .binmodel import ParaLineSeg |
| | from .binmodel import ParaCharShape |
| | from .binmodel import LineSeg |
| | from .binmodel import ParaRangeTag |
| | from .binmodel import Field |
| | from .binmodel import ControlChar |
| | from .binmodel import Control |
| | from .charsets import tokenize_unicode_by_lang |
| | from .dataio import Struct |
| | from .filestructure import VERSION |
| | from .treeop import STARTEVENT, ENDEVENT |
| | from .treeop import prefix_event |
| | from .treeop import build_subtree |
| | from .treeop import tree_events |
| | from .treeop import tree_events_multi |
| | from .xmlformat import startelement |
| | from .xmlformat import xmlevents_to_bytechunks |
| |
|
| |
|
| | PY3 = sys.version_info.major == 3 |
| | if PY3: |
| | basestring = str |
| | unichr = chr |
| |
|
| |
|
| | logger = logging.getLogger(__name__) |
| |
|
| |
|
| | def give_elements_unique_id(event_prefixed_mac): |
| | paragraph_id = 0 |
| | table_id = 0 |
| | gshape_id = 0 |
| | shape_id = 0 |
| | for event, item in event_prefixed_mac: |
| | (model, attributes, context) = item |
| | if event == STARTEVENT: |
| | if model == Paragraph: |
| | attributes['paragraph_id'] = paragraph_id |
| | paragraph_id += 1 |
| | elif model == TableControl: |
| | attributes['table_id'] = table_id |
| | table_id += 1 |
| | elif model == GShapeObjectControl: |
| | attributes['gshape_id'] = gshape_id |
| | gshape_id += 1 |
| | elif model == ShapeComponent: |
| | attributes['shape_id'] = shape_id |
| | shape_id += 1 |
| | yield event, item |
| |
|
| |
|
| | def make_ranged_shapes(shapes): |
| | last = None |
| | for item in shapes: |
| | if last is not None: |
| | yield (last[0], item[0]), last[1] |
| | last = item |
| | yield (item[0], 0x7fffffff), item[1] |
| |
|
| |
|
| | def split_and_shape(chunks, ranged_shapes): |
| | try: |
| | (chunk_start, chunk_end), chunk_attr, chunk = next(chunks) |
| | except StopIteration: |
| | return |
| | for (shape_start, shape_end), shape in ranged_shapes: |
| | while True: |
| | |
| | |
| | |
| | if chunk_start < shape_start: |
| | assert False |
| |
|
| | |
| | |
| | |
| | if shape_end <= chunk_start: |
| | break |
| |
|
| | assert chunk_start < shape_end |
| | assert shape_start <= chunk_start |
| | |
| | |
| | |
| | if shape_end < chunk_end: |
| | prev = ((chunk_start, shape_end), |
| | chunk[:shape_end - chunk_start]) |
| | nexT = ((shape_end, chunk_end), |
| | chunk[shape_end - chunk_start:]) |
| | (chunk_start, chunk_end), chunk = prev |
| | else: |
| | nexT = None |
| |
|
| | assert chunk_end <= shape_end |
| | yield (chunk_start, chunk_end), (shape, chunk_attr), chunk |
| |
|
| | if nexT is not None: |
| | (chunk_start, chunk_end), chunk = nexT |
| | continue |
| |
|
| | try: |
| | (chunk_start, chunk_end), chunk_attr, chunk = next(chunks) |
| | except StopIteration: |
| | return |
| |
|
| |
|
| | def line_segmented(chunks, ranged_linesegs): |
| | prev_lineseg = None |
| | line = None |
| | for ((chunk_start, chunk_end), |
| | (lineseg, chunk_attr), |
| | chunk) in split_and_shape(chunks, ranged_linesegs): |
| | if lineseg is not prev_lineseg: |
| | if line is not None: |
| | yield prev_lineseg, line |
| | line = [] |
| | line.append(((chunk_start, chunk_end), chunk_attr, chunk)) |
| | prev_lineseg = lineseg |
| | if line is not None: |
| | yield prev_lineseg, line |
| |
|
| |
|
| | def make_texts_linesegmented_and_charshaped(event_prefixed_mac): |
| | ''' lineseg/charshaped text chunks ''' |
| |
|
| | stack = [] |
| | for event, item in event_prefixed_mac: |
| | model, attributes, context = item |
| | if model is Paragraph: |
| | if event == STARTEVENT: |
| | stack.append(dict()) |
| | yield STARTEVENT, item |
| | else: |
| | paratext = stack[-1].get(ParaText) |
| | paracharshape = stack[-1].get(ParaCharShape) |
| | paralineseg = stack[-1].get(ParaLineSeg) |
| | |
| | |
| | if paratext is None: |
| | paratext = (ParaText, |
| | dict(chunks=[((0, 0), '')]), |
| | dict(context)) |
| | for x in merge_paragraph_text_charshape_lineseg(paratext, |
| | paracharshape, |
| | paralineseg): |
| | yield x |
| |
|
| | yield ENDEVENT, (model, attributes, context) |
| | stack.pop() |
| | elif model in (ParaText, ParaCharShape, ParaLineSeg, ParaRangeTag): |
| | if event == STARTEVENT: |
| | stack[-1][model] = model, attributes, context |
| | else: |
| | yield event, (model, attributes, context) |
| |
|
| |
|
| | def merge_paragraph_text_charshape_lineseg(paratext, paracharshape, |
| | paralineseg): |
| |
|
| | paratext_model, paratext_attributes, paratext_context = paratext |
| |
|
| | chunks = ((range, None, chunk) |
| | for range, chunk in paratext_attributes['chunks']) |
| | charshapes = paracharshape[1]['charshapes'] |
| | shaped_chunks = split_and_shape(chunks, make_ranged_shapes(charshapes)) |
| |
|
| | if paralineseg: |
| | paralineseg_content = paralineseg[1] |
| | paralineseg_context = paralineseg[2] |
| | else: |
| | |
| | |
| | |
| | lineseg = dict(chpos=0, y=0, height=0, height2=0, height85=0, |
| | space_below=0, x=0, width=0, a8=0, flags=0) |
| | paralineseg_content = dict(linesegs=[lineseg]) |
| | paralineseg_context = dict() |
| | linesegs = ((lineseg['chpos'], lineseg) |
| | for lineseg in paralineseg_content['linesegs']) |
| | lined_shaped_chunks = line_segmented(shaped_chunks, |
| | make_ranged_shapes(linesegs)) |
| | for lineseg_content, shaped_chunks in lined_shaped_chunks: |
| | lineseg = (LineSeg, lineseg_content, paralineseg_context) |
| | chunk_events = range_shaped_textchunk_events(paratext_context, |
| | shaped_chunks) |
| | for x in wrap_modelevents(lineseg, chunk_events): |
| | yield x |
| |
|
| |
|
| | def range_shaped_textchunk_events(paratext_context, range_shaped_textchunks): |
| | for (startpos, endpos), (shape, none), chunk in range_shaped_textchunks: |
| | if isinstance(chunk, basestring): |
| | textitem = (Text, |
| | dict(text=chunk, charshape_id=shape), |
| | paratext_context) |
| | yield STARTEVENT, textitem |
| | yield ENDEVENT, textitem |
| | elif isinstance(chunk, dict): |
| | code = chunk['code'] |
| | uch = unichr(code) |
| | name = ControlChar.get_name_by_code(code) |
| | kind = ControlChar.kinds[uch] |
| | chunk_attributes = dict(name=name, |
| | code=code, |
| | kind=kind, |
| | charshape_id=shape) |
| | if code in (0x9, 0xa, 0xd): |
| | chunk_attributes['char'] = uch |
| | ctrlch = (ControlChar, chunk_attributes, paratext_context) |
| | yield STARTEVENT, ctrlch |
| | yield ENDEVENT, ctrlch |
| |
|
| |
|
| | def wrap_section(event_prefixed_mac, sect_id=None): |
| | ''' wrap a section with SectionDef ''' |
| | starting_buffer = list() |
| | started = False |
| | sectiondef = None |
| | for event, item in event_prefixed_mac: |
| | if started: |
| | yield event, item |
| | else: |
| | model, attributes, context = item |
| | if model is SectionDef and event is STARTEVENT: |
| | sectiondef, sectdef_child = build_subtree(event_prefixed_mac) |
| | if sect_id is not None: |
| | attributes['section_id'] = sect_id |
| | yield STARTEVENT, sectiondef |
| | for k in tree_events_multi(sectdef_child): |
| | yield k |
| | for evented_item in starting_buffer: |
| | yield evented_item |
| | started = True |
| | else: |
| | starting_buffer.append((event, item)) |
| | yield ENDEVENT, sectiondef |
| |
|
| |
|
| | class ColumnSet: |
| | pass |
| |
|
| |
|
| | def wrap_columns(event_prefixed_mac): |
| |
|
| | stack = [] |
| |
|
| | for event, item in event_prefixed_mac: |
| | model, attributes, context = item |
| |
|
| | if model is Paragraph: |
| | if event is STARTEVENT: |
| |
|
| | split = attributes['split'] |
| | split = Paragraph.SplitFlags(split) |
| |
|
| | if split.new_columnsdef: |
| | if stack[-1][0] is ColumnSet: |
| | yield ENDEVENT, stack.pop() |
| |
|
| | columns = (ColumnSet, {}, {}) |
| | stack.append(columns) |
| | yield STARTEVENT, columns |
| |
|
| | else: |
| | if event is STARTEVENT: |
| | stack.append(item) |
| | else: |
| | if model != stack[-1][0]: |
| | assert stack[-1][0] is ColumnSet |
| | yield ENDEVENT, stack.pop() |
| | stack.pop() |
| |
|
| | yield event, item |
| |
|
| |
|
| | def make_extended_controls_inline(event_prefixed_mac, stack=None): |
| | ''' inline extended-controls into paragraph texts ''' |
| | if stack is None: |
| | stack = [] |
| | for event, item in event_prefixed_mac: |
| | model, attributes, context = item |
| | if model is Paragraph: |
| | for x in meci_paragraph(event, stack, item): |
| | yield x |
| | elif model is ControlChar: |
| | for x in meci_controlchar(event, stack, item, attributes): |
| | yield x |
| | elif issubclass(model, Control) and event == STARTEVENT: |
| | control_subtree = build_subtree(event_prefixed_mac) |
| | paragraph = stack[-1] |
| | paragraph_controls = paragraph.setdefault(Control, []) |
| | paragraph_controls.append(control_subtree) |
| | else: |
| | yield event, item |
| |
|
| |
|
| | def meci_paragraph(event, stack, item): |
| | if event == STARTEVENT: |
| | stack.append(dict()) |
| | yield STARTEVENT, item |
| | else: |
| | yield ENDEVENT, item |
| | stack.pop() |
| |
|
| |
|
| | def meci_controlchar(event, stack, item, attributes): |
| | if event is STARTEVENT: |
| | if attributes['kind'] is ControlChar.EXTENDED: |
| | paragraph = stack[-1] |
| | paragraph_controls = paragraph.get(Control) |
| | control_subtree = paragraph_controls.pop(0) |
| | tev = tree_events(*control_subtree) |
| | |
| | |
| | yield next(tev) |
| |
|
| | for k in make_extended_controls_inline(tev, stack): |
| | yield k |
| | else: |
| | yield STARTEVENT, item |
| | yield ENDEVENT, item |
| |
|
| |
|
| | def make_paragraphs_children_of_listheader(event_prefixed_mac, |
| | parentmodel=ListHeader, |
| | childmodel=Paragraph): |
| | ''' make paragraphs children of the listheader ''' |
| | stack = [] |
| | level = 0 |
| | for event, item in event_prefixed_mac: |
| | model, attributes, context = item |
| | if event is STARTEVENT: |
| | level += 1 |
| | if len(stack) > 0 and ((event is STARTEVENT |
| | and stack[-1][0] == level |
| | and model is not childmodel) or |
| | (event is ENDEVENT |
| | and stack[-1][0] - 1 == level)): |
| | lh_level, lh_item = stack.pop() |
| | yield ENDEVENT, lh_item |
| |
|
| | if issubclass(model, parentmodel): |
| | if event is STARTEVENT: |
| | stack.append((level, item)) |
| | yield event, item |
| | else: |
| | pass |
| | else: |
| | yield event, item |
| |
|
| | if event is ENDEVENT: |
| | level -= 1 |
| |
|
| |
|
| | def match_field_start_end(event_prefixed_mac): |
| | stack = [] |
| | for event, item in event_prefixed_mac: |
| | (model, attributes, context) = item |
| | if issubclass(model, Field): |
| | for x in mfse_field(event, stack, item): |
| | yield x |
| | elif model is LineSeg: |
| | for x in mfse_lineseg(event, stack, item): |
| | yield x |
| | elif model is ControlChar and attributes['name'] == 'FIELD_END': |
| | for x in mfse_field_end(event, stack, item): |
| | yield x |
| | else: |
| | yield event, item |
| |
|
| |
|
| | def mfse_field(event, stack, item): |
| | if event is STARTEVENT: |
| | stack.append(item) |
| | yield event, item |
| | else: |
| | pass |
| |
|
| |
|
| | def mfse_lineseg(event, stack, item): |
| | if event is ENDEVENT: |
| | |
| | for field_item in reversed(stack): |
| | yield ENDEVENT, field_item |
| | yield event, item |
| | elif event is STARTEVENT: |
| | yield event, item |
| | |
| | for field_item in stack: |
| | yield STARTEVENT, field_item |
| |
|
| |
|
| | def mfse_field_end(event, stack, item): |
| | if event is ENDEVENT: |
| | if len(stack) > 0: |
| | yield event, stack.pop() |
| | else: |
| | logger.warning('unmatched field end') |
| |
|
| |
|
| | class TableRow: |
| | pass |
| |
|
| |
|
| | ROW_OPEN = 1 |
| | ROW_CLOSE = 2 |
| |
|
| |
|
| | def restructure_tablebody(event_prefixed_mac): |
| | ''' Group table columns in each rows and wrap them with TableRow. ''' |
| | stack = [] |
| | for event, item in event_prefixed_mac: |
| | (model, attributes, context) = item |
| | if model is TableBody: |
| | for x in rstbody_tablebody(event, stack, item, attributes, |
| | context): |
| | yield x |
| | elif model is TableCell: |
| | for x in rstbody_tablecell(event, stack, item): |
| | yield x |
| | else: |
| | yield event, item |
| |
|
| |
|
| | def rstbody_tablebody(event, stack, item, attributes, context): |
| | if event is STARTEVENT: |
| | rowcols = deque() |
| | for cols in attributes.pop('rowcols'): |
| | if cols == 1: |
| | rowcols.append(ROW_OPEN | ROW_CLOSE) |
| | else: |
| | rowcols.append(ROW_OPEN) |
| | for i in range(0, cols - 2): |
| | rowcols.append(0) |
| | rowcols.append(ROW_CLOSE) |
| | stack.append((context, rowcols)) |
| | yield event, item |
| | else: |
| | yield event, item |
| | stack.pop() |
| |
|
| |
|
| | def rstbody_tablecell(event, stack, item): |
| | table_context, rowcols = stack[-1] |
| | row_context = dict(table_context) |
| | if event is STARTEVENT: |
| | how = rowcols[0] |
| | if how & ROW_OPEN: |
| | yield STARTEVENT, (TableRow, dict(), row_context) |
| | yield event, item |
| | if event is ENDEVENT: |
| | how = rowcols.popleft() |
| | if how & ROW_CLOSE: |
| | yield ENDEVENT, (TableRow, dict(), row_context) |
| |
|
| |
|
| | def tokenize_text_by_lang(event_prefixed_mac): |
| | ''' Group table columns in each rows and wrap them with TableRow. ''' |
| | for event, item in event_prefixed_mac: |
| | (model, attributes, context) = item |
| | if model is Text: |
| | if event is STARTEVENT: |
| | charshape_id = attributes['charshape_id'] |
| | for lang, text in tokenize_unicode_by_lang(attributes['text']): |
| | token = (Text, { |
| | 'charshape_id': charshape_id, |
| | 'lang': lang, |
| | 'text': text, |
| | }, context) |
| | yield STARTEVENT, token |
| | yield ENDEVENT, token |
| | else: |
| | yield event, item |
| |
|
| |
|
| | def embed_bindata(event_prefixed_mac, bindata): |
| | for event, item in event_prefixed_mac: |
| | (model, attributes, context) = item |
| | if event is STARTEVENT and model is BinData: |
| | if attributes['flags'].storage is BinData.StorageType.EMBEDDING: |
| | name = ('BIN%04X' % attributes['bindata']['storage_id'] |
| | + '.' |
| | + attributes['bindata']['ext']) |
| | bin_stream = bindata[name].open() |
| | try: |
| | binary = bin_stream.read() |
| | finally: |
| | bin_stream.close() |
| | b64 = base64.b64encode(binary) |
| | b64 = b64.decode('ascii') |
| | truncated = [] |
| | while b64: |
| | if len(b64) > 64: |
| | truncated.append(b64[:64]) |
| | b64 = b64[64:] |
| | else: |
| | truncated.append(b64) |
| | b64 = '' |
| | b64 = '\n'.join(truncated) |
| | b64 = '\n' + b64 + '\n' |
| | attributes['bindata']['<text>'] = b64 |
| | attributes['bindata']['inline'] = 'true' |
| | yield event, item |
| |
|
| |
|
| | def prefix_binmodels_with_event(context, models): |
| | level_prefixed = ((model['level'], |
| | (model['type'], model['content'], context)) |
| | for model in models) |
| | return prefix_event(level_prefixed) |
| |
|
| |
|
| | def wrap_modelevents(wrapper_model, modelevents): |
| | yield STARTEVENT, wrapper_model |
| | for mev in modelevents: |
| | yield mev |
| | yield ENDEVENT, wrapper_model |
| |
|
| |
|
| | def modelevents_to_xmlevents(modelevents): |
| | for event, (model, attributes, context) in modelevents: |
| | try: |
| | if event is STARTEVENT: |
| | for x in startelement(context, (model, attributes)): |
| | yield x |
| | elif event is ENDEVENT: |
| | yield ENDEVENT, model.__name__ |
| | except: |
| | logger.error('model: %s', pformat({ |
| | 'event': event, |
| | 'model': model, |
| | 'attributes': attributes, |
| | 'context': context |
| | })) |
| | raise |
| |
|
| |
|
| | class XmlEvents(object): |
| |
|
| | def __init__(self, events): |
| | self.events = events |
| |
|
| | def __iter__(self): |
| | return modelevents_to_xmlevents(self.events) |
| |
|
| | def bytechunks(self, xml_declaration=True, **kwargs): |
| | encoding = kwargs.get('xml_encoding', 'utf-8') |
| | if xml_declaration: |
| | yield '<?xml version="1.0" encoding="{}"?>\n'.format( |
| | encoding |
| | ).encode( |
| | encoding |
| | ) |
| | bytechunks = xmlevents_to_bytechunks(self, encoding) |
| | for chunk in bytechunks: |
| | yield chunk |
| |
|
| | def dump(self, outfile, **kwargs): |
| | bytechunks = self.bytechunks(**kwargs) |
| | for chunk in bytechunks: |
| | outfile.write(chunk) |
| | if hasattr(outfile, 'flush'): |
| | outfile.flush() |
| |
|
| | def open(self, **kwargs): |
| | tmpfile = TemporaryFile() |
| | try: |
| | self.dump(tmpfile, **kwargs) |
| | except: |
| | tmpfile.close() |
| | raise |
| |
|
| | tmpfile.seek(0) |
| | return tmpfile |
| |
|
| |
|
| | class XmlEventsMixin(object): |
| |
|
| | def xmlevents(self, **kwargs): |
| | return XmlEvents(self.events(**kwargs)) |
| |
|
| |
|
| | class ModelEventStream(binmodel.ModelStream, XmlEventsMixin): |
| |
|
| | def modelevents(self, **kwargs): |
| | models = self.models(**kwargs) |
| |
|
| | |
| | kwargs.setdefault('version', self.version) |
| | return prefix_binmodels_with_event(kwargs, models) |
| |
|
| | def other_formats(self): |
| | d = super(ModelEventStream, self).other_formats() |
| | d['.xml'] = self.xmlevents().open |
| | return d |
| |
|
| |
|
| | class HwpSummaryInfo(filestructure.HwpSummaryInfo, XmlEventsMixin): |
| |
|
| | def events(self, **context): |
| | generator = PropertySetStreamModelEventsGenerator(context) |
| | events = generator.generateModelEvents(self.propertySetStream) |
| | element = HwpSummaryInfo, {}, context |
| | return wrap_modelevents(element, events) |
| |
|
| |
|
| | class PropertySetStreamModelEventsGenerator(object): |
| |
|
| | def __init__(self, context): |
| | self.context = context |
| |
|
| | def generateModelEvents(self, stream): |
| | return self.getPropertySetStreamEvents(stream) |
| |
|
| | def getPropertySetStreamEvents(self, stream): |
| | from .msoleprops import PropertySetStream |
| | sectionEvents = [ |
| | self.getPropertySetEvents(propertyset) |
| | for propertyset in stream.propertysets |
| | ] |
| | events = chain(*sectionEvents) |
| |
|
| | content = dict( |
| | byte_order='{:04x}'.format( |
| | stream.byteOrder, |
| | ), |
| | version=str(stream.version), |
| | system_identifier='{:08x}'.format( |
| | stream.systemIdentifier, |
| | ), |
| | clsid=str(stream.clsid) |
| | ) |
| | element = PropertySetStream, content, self.context |
| | return wrap_modelevents(element, events) |
| |
|
| | def getPropertySetEvents(self, propertyset): |
| | from .msoleprops import PropertySet |
| | propertyEvents = [ |
| | self.getPropertyEvents(property) |
| | for property in sorted( |
| | propertyset.properties, |
| | key=lambda property: property.desc.offset |
| | ) |
| | ] |
| | events = chain(*propertyEvents) |
| |
|
| | content = dict( |
| | fmtid=propertyset.fmtid, |
| | offset=propertyset.desc.offset, |
| | ) |
| | element = PropertySet, content, self.context |
| | return wrap_modelevents(element, events) |
| |
|
| | def getPropertyEvents(self, property): |
| | from .msoleprops import PID_DICTIONARY |
| | from .msoleprops import Property |
| | content = dict( |
| | id=property.desc.id, |
| | offset=property.desc.offset, |
| | ) |
| | if property.idLabel is not None: |
| | content['id_label'] = property.idLabel |
| | if property.type is not None: |
| | content['type'] = str(property.type.vt_type.__name__) |
| | content['type_code'] = '0x{:04x}'.format(property.type.code) |
| | if property.id == PID_DICTIONARY.id: |
| | events = self.getDictionaryEvents(property.value) |
| | else: |
| | events = () |
| | content['value'] = property.value |
| | element = Property, content, self.context |
| | return wrap_modelevents(element, events) |
| |
|
| | def getDictionaryEvents(self, dictionary): |
| | events = list(self.getDictionaryEntryEvents(entry) |
| | for entry in dictionary.entries) |
| | return chain(*events) |
| |
|
| | def getDictionaryEntryEvents(self, entry): |
| | from .msoleprops import DictionaryEntry |
| | content = dict( |
| | id=entry.id, |
| | name=entry.name, |
| | ) |
| | element = DictionaryEntry, content, self.context |
| | return wrap_modelevents(element, ()) |
| |
|
| |
|
| | class DocInfo(ModelEventStream): |
| |
|
| | def events(self, **kwargs): |
| | docinfo = DocInfo, dict(), dict() |
| | events = self.modelevents(**kwargs) |
| | if 'embedbin' in kwargs: |
| | events = embed_bindata(events, kwargs['embedbin']) |
| | events = wrap_modelevents(docinfo, events) |
| | return events |
| |
|
| |
|
| | class Section(ModelEventStream): |
| |
|
| | def events(self, **kwargs): |
| | events = self.modelevents(**kwargs) |
| |
|
| | events = make_texts_linesegmented_and_charshaped(events) |
| | events = make_extended_controls_inline(events) |
| | events = match_field_start_end(events) |
| | events = make_paragraphs_children_of_listheader(events) |
| | events = make_paragraphs_children_of_listheader(events, TableBody, |
| | TableCell) |
| | events = restructure_tablebody(events) |
| | events = tokenize_text_by_lang(events) |
| |
|
| | section_idx = kwargs.get('section_idx') |
| | events = wrap_section(events, section_idx) |
| | events = wrap_columns(events) |
| |
|
| | return events |
| |
|
| |
|
| | class Sections(binmodel.Sections, XmlEventsMixin): |
| |
|
| | section_class = Section |
| |
|
| | def events(self, **kwargs): |
| | bodytext_events = [] |
| | for idx in self.section_indexes(): |
| | kwargs['section_idx'] = idx |
| | section = self.section(idx) |
| | events = section.events(**kwargs) |
| | bodytext_events.append(events) |
| |
|
| | class BodyText(object): |
| | pass |
| | bodytext_events = chain(*bodytext_events) |
| | bodytext = BodyText, dict(), dict() |
| | return wrap_modelevents(bodytext, bodytext_events) |
| |
|
| | def other_formats(self): |
| | d = super(Sections, self).other_formats() |
| | d['.xml'] = self.xmlevents().open |
| | return d |
| |
|
| |
|
| | class HwpDoc(Struct): |
| |
|
| | def attributes(): |
| | yield VERSION, 'version' |
| | attributes = staticmethod(attributes) |
| |
|
| |
|
| | class Hwp5File(binmodel.Hwp5File, XmlEventsMixin): |
| |
|
| | summaryinfo_class = HwpSummaryInfo |
| | docinfo_class = DocInfo |
| | bodytext_class = Sections |
| |
|
| | def events(self, **kwargs): |
| | if 'embedbin' in kwargs and kwargs['embedbin'] and 'BinData' in self: |
| | kwargs['embedbin'] = self['BinData'] |
| | else: |
| | kwargs.pop('embedbin', None) |
| |
|
| | events = chain(self.summaryinfo.events(**kwargs), |
| | self.docinfo.events(**kwargs), |
| | self.text.events(**kwargs)) |
| |
|
| | hwpdoc = HwpDoc, dict(version=self.header.version), dict() |
| | events = wrap_modelevents(hwpdoc, events) |
| |
|
| | |
| | events = give_elements_unique_id(events) |
| |
|
| | return events |
| |
|