# # File:: %RS_TOOLSLIB%/pipeline/content/parser.rb # Description:: Content system parser implementation. # Author:: David Muir # Date:: 12 June 2008 # #----------------------------------------------------------------------------- # Uses #----------------------------------------------------------------------------- require 'pipeline/config/projects' require 'pipeline/content/treecore' require 'pipeline/os/path' require 'pipeline/resourcing/path' require 'pipeline/util/string' require 'rexml/document' include REXML #----------------------------------------------------------------------------- # Implementation #----------------------------------------------------------------------------- module Pipeline module Content # # == Description # This is the content-tree parser class. This parses the content XML # file(s) and constructs Content::Base-derived nodes that represent the # content-tree in-memory. # class Parser class ParseError < Exception; end attr_reader :project # Initialised project attr_reader :branch # Initialised branch name attr_reader :target # Current target # # Parser constructor. Pass in a project to initialise the parser for, # as the content XML tree is parsed the project is required for path # resolution and target resolution. # def initialize( project, branch = nil ) @project = project @branch = branch unless ( branch.nil? ) @branch = project.default_branch if ( branch.nil? ) @target = @project.branches[@branch].ind_target @content_lookup_cache = nil load_custom_content_types( ) end # # Parse an XML document into a Content Tree. # def parse_xml( filename, env = nil, filter = nil ) tree = nil begin env = Environment.new( ) if ( env.nil? ) env.push( ) @project.branches[@branch].fill_env( env ) Parser::log().info( "Parse XML: #{filename}" ) ::File.open( filename ) do |sourcefile| sourcedoc = Document.new( sourcefile ) sourcedoc = filter.call( filename, sourcedoc ) unless ( filter.nil? ) path = '' tree = parse_node( sourcedoc.root, path, env ) end rescue REXML::ParseException => ex # Handle REXML parse exceptions. message = "XML Parse Error parsing #{filename}: #{ex.message}" print_exception( ex, message ) Parser::log().error( message ) ex.backtrace.each do |m| Parser::log().info( m ); end rescue ParseError => ex # Handle our custom parse error exceptions. message = "Content XML formatting error parsing #{filename}: #{ex.message}" print_exception( ex, message ) Parser::log().error( message ) ex.backtrace.each do |m| Parser::log().info( m ); end rescue Exception => ex # Handle other exceptions message = "Unhandled exception: #{ex.message}" print_exception( ex, message ) Parser::log().error( message ) ex.backtrace.each do |m| Parser::log().info( m ); end ensure # Ensure we pop the environment stack so we don't trash # it for its owner. env.pop( ) unless ( env.nil? ) end tree end # Return class-wide Log object. def Parser::log( ) @@log = Log.new( 'contentparser' ) if ( @@log.nil? ) @@log end #--------------------------------------------------------------------- # Private Methods #--------------------------------------------------------------------- private # # Find all Ruby files (prefixed with 'content_') in directory and # load them into our current context. Any child classes of Base are # then considered content classes. They must have their # XML_CONTENT_TYPE constant well defined. # def load_custom_content_types( ) Parser::log().info( "Loading custom content types:" ) path = OS::Path.combine( Config::instance.toolslib, 'pipeline', 'content' ) Dir.foreach( path ) do |basename| next unless ( basename.starts_with( 'content_' ) ) filename = OS::Path.combine( 'pipeline', 'content', basename ) Parser::log().info( "Loading #{filename}" ) require( filename ) end Base.registered_content_types.each do |t| Parser::log().info( "Registered handler for \"#{t::XML_CONTENT_TYPE}\":: #{t}" ) end end # # Method to parse a single XML node irrespective of type. # def parse_node( xml_node, path, env ) case xml_node.name when 'content' env.push( ) content = parse_content_node( xml_node, path, env ) if ( nil != content.path and '' != content.path ) srcpath = content.path else srcpath = path end # Don't recurse through child for templates, as the template # parsing itself needs to handle that. Otherwise we recurse # forever (which is a long time). This is why we have the # Groups masqueraded as Template's. return ( content ) if ( content.is_a?( Pipeline::Content::Template ) ) # Content nodes can have input nodes or sometimes child # content nodes. xml_node.each_element do |child_xml_node| if ( 'content' == child_xml_node.name ) then # We can only add child content nodes, if our # higher-level content node supports children... raise ParseError.new( "Higher-level content node does not support children. XML error? Content: #{content.class}." ) \ unless ( content.methods.include?( 'children' ) ) child_content = parse_node( child_xml_node, srcpath, env ) content.add_child( child_content ) elsif ( 'in' == child_xml_node.name ) then # Straight forward we parse and add it to our # input array. input = parse_input_node( child_xml_node, srcpath, env ) if ( input.is_a?( Content::Base ) ) then content.add_input( input ) input.add_output( content ) end end end content.post_load_input( ) content.post_load_output( ) env.pop( ) return ( content ) when 'in' input = parse_input_node( xml_node, srcpath, env ) # Input nodes cannot have any child elements. throw ParseError.new( "Invalid input node, input nodes cannot have child XML nodes." ) \ if ( xml_node.elements.size() > 0 ) return ( input ) else # Unrecognised node name so we abort immediately. throw ParseError.new( "Unrecognised XML node name: #{xml_node}" ) end # End node.name case statement end # # Method to parse a single XML content node. # def parse_content_node( xml_node, path, env ) raise ArgumentError.new( 'Invalid content node, name incorrect.' ) \ unless ( 'content' == xml_node.name ) contentname = env.subst( xml_node.attributes['name'] ) unless ( xml_node.attributes['name'].nil? ) contenttype = env.subst( xml_node.attributes['type'] ) unless ( xml_node.attributes['type'].nil? ) throw ParseError.new( "XML Parse Error in content XML. Content node has no type defined, XML: #{xml_node}." ) \ if ( contenttype.nil? ) contentnode = nil # This is the constructed content node. case contenttype #------------------------------------------------------------- # Template Content Nodes #------------------------------------------------------------- when 'template:content:each:recursive' # Loop through all of our content nodes creating additional # XML Nodes for all that evaluate 'true' in the script. script = xml_node.attributes['script'] groupname = xml_node.attributes['group'] found_content = [] if ( nil == groupname ) # No group specified so we search entire content tree. found_content = @project.content.find_by_script( script ) else # Group specified so we first find our group(s) and then # search that as our content tree. groups = @project.content.find_groups( groupname ) groups.each do |group| found_content += group.find_by_script( script ) end end contentnode = Template.new( 'template:content:each:recursive' ) traverse_content_set( contentnode, found_content, xml_node, path, env ) when 'template:file:each' # Loop through files in path based on a search filter, # specified using the 'search' XML attribute. Ignores # directories - see 'template:path:each'. search_path = "" @project.ind_target.in_env do |e| search_path = e.subst( path ) end contentnode = Template.new( 'template:file:each' ) if ( ::File.directory?( search_path ) ) then file_list = Array.new() file_list = OS::FindEx.find_files(search_path + "/*." + xml_node.attributes['search'], true) #Dir.open( search_path ) do |dir| # dir.each() do |f| # extension = OS::Path.get_extension( f ) # next unless ( extension == xml_node.attributes['search'] ) # file_list << f # end #end file_list.each do |f| env.push() env.add( 'search', xml_node.attributes['search'] ) env.add( 'found', OS::Path.get_basename( f ) ) env.add( 'ext', OS::Path.get_extension( f ) ) xml_node.each_element do |xml_child_node| resolved_xml_child_node = resolve_template( xml_child_node, env ) newobj = parse_node( resolved_xml_child_node, path, env ) contentnode.add_child( newobj ) end env.pop() end end # Directory exists check when 'template:file:each:regexp' # Loop through files in path based on a regexp filter, # specified using the 'regexp' XML attribute. Ignores # directories - see 'template:path:each'. search_path = '' @project.ind_target.in_env do |e| search_path = e.subst( path ) end contentnode = Template.new( 'template:file:each:regexp' ) regexp = Regexp.new( xml_node.attributes['regexp'] ) if ( ::File.directory?( search_path ) ) then file_list = [] Dir.open( search_path ) do |dir| dir.each() do |f| next unless ( f =~ regexp ) file_list << OS::Path.get_basename( f ) end end file_list.each do |f| env.push() env.add( 'regexp', regexp.to_s ) env.add( 'found', f ) xml_node.each_element do |xml_child_node| resolved_xml_child_node = resolve_template( xml_child_node, env ) newobj = parse_node( resolved_xml_child_node, path, env ) contentnode.add_child( newobj ) end end end when 'template:path:each' # Loop through directories in path. Ignores files - see # 'template:file:each'. contentnode = Template.new( 'template:path:each' ) search_path = "" @project.ind_target.in_env do |e| search_path = e.subst( path ) end dir_list = OS::FindEx.find_dirs( search_path, true ) dir_list.each do |directory| env.push() env.add( "found", directory ) srcpath = OS::Path.combine( path, directory ) xml_node.each_element do |xml_child_node| resolved_xml_child_node = resolve_template( xml_child_node, env ) newobj = parse_node( resolved_xml_child_node, path, env ) contentnode.add_child( newobj ) end env.pop() end when 'template:path:each:recursive' # Loop through directories in path recursively. Ignore # files - see 'template:file:each' or for non-recursive # version see 'template:path:each'. contentnode = Template.new( 'template:path:each:recursive' ) group = false search_path = "" path = xml_node.attributes["path"] if path == "" group = ( 'true' == xml_node.attributes["group"] ) ? true : false search_ext = xml_node.attributes["search"] @project.ind_target.in_env do |e| search_path = e.subst( path ) end templateelement = nil xml_node.each_element do |xml_child_node| templateelement = xml_child_node end traverse_directories( search_path, search_ext, "", contentnode, env, templateelement, group ) when 'template:path:each:recursive:pack' contentnode = Template.new( 'template:path:each:recursive' ) group = false search_path = "" path = xml_node.attributes["path"] if path == "" pack_type = xml_node.attributes["packtype"] search_ext = xml_node.attributes["search"] @project.ind_target.in_env do |e| search_path = e.subst( path ) end templateelement = nil xml_node.each_element do |xml_child_node| templateelement = xml_child_node end handler_class = nil Base.registered_content_types.each do |t| if ( t::XML_CONTENT_TYPE == pack_type ) handler_class = t break end end traverse_directories_pack( search_path, search_ext, "", contentnode, env, templateelement, handler_class) if nil != handler_class when "template:target:each" # Loop through all project's enabled targets. contentnode = Template.new( 'template:target:each' ) traverse_targets( contentnode, xml_node, path, env ) else # DEFAULT CASE # Find our type associated with the content type. handler_class = nil Base.registered_content_types.each do |t| if ( t::XML_CONTENT_TYPE == contenttype ) handler_class = t break end end # Now if we have a handler class we should be able to # invoke its from_xml class method to construct our content # node. if ( nil != handler_class ) then contentnode = handler_class::from_xml( xml_node, path, env, @target ) end # Otherwise we will mop up prior to returning by creating # an Unknown content node at the end of this method. end # End of contenttype case statement # Mop up for when we don't have a handler class for the # type or something screwed up earlier. if ( nil == contentnode ) then contentnode = Unknown.new( "No type handler defined for type:#{contenttype}.", xml_node ) Parser::log().warn( "Creating Unknown content node: #{contentnode} at #{path} xml is: #{xml_node}" ) end contentnode end # # Method to parse a single XML input node. # def parse_input_node( xml_node, path, env ) raise ArgumentError.new( 'Invalid input node, name incorrect.' ) \ unless ( 'in' == xml_node.name ) input_node = nil inputname = env.subst( xml_node.attributes['name'] ) inputtype = env.subst( xml_node.attributes['type'] ) inputgroup = env.subst( xml_node.attributes['group'] ) unless ( xml_node.attributes['group'].nil? ) # Prior to actually doing a full content hierarchy search, we have # a cache local object that we can check first. if ( ( nil != @content_lookup_cache ) and @content_lookup_cache.is_a?( Pipeline::Content::Base ) ) then # Cache resolving of input node if ( @content_lookup_cache.name == inputname and @content_lookup_cache.xml_type == inputtype ) then return ( @content_lookup_cache ) end # Otherwise try finding in our cached node if ( @content_lookup_cache.methods.include?( 'find_first' ) ) then start = Time.now() print "Trying content_lookup_cache..." if ( ( '' == inputgroup ) or ( nil == inputgroup ) ) then input_node = @content_lookup_cache.find_first( inputname, inputtype ) else input_group = @content_lookup_cache.find_first_group( inputgroup ) input_node = input_group.find_first( inputname, inputtype ) unless ( nil == input_group ) end print " found #{Time.now() - start}.\n" unless ( nil == input_node ) end end # Check if using cache failed and if so use regular resolving of # input node. #puts "Resolve: #{inputname} #{inputtype} #{inputgroup}" input_node = nil if ( ( '' == inputgroup ) or ( nil == inputgroup ) ) then input_node = @project.content.find_first( inputname, inputtype ) else input_group = @project.content.find_first_group( inputgroup ) #puts "\tGroup not found." if ( nil == input_group ) input_node = input_group.find_first( inputname, inputtype ) unless ( nil == input_group ) end # Mop up for when we don't resolve the input node, like a odd # configuration, unsupported type or a XML/parser bug. if ( nil == input_node ) then input_node = Unresolved.new( inputname, inputtype, inputgroup ) Parser::log().warn( "Creating Unresolved input node: #{input_node}" ) end input_node end # # Resolve a template and then turn it into content objects. # def parse_template_node( template_node, path, env ) template_xml = resolve_template( template_node, env ) parse_node( template_xml, path, env ) end #--------------------------------------------------------------------- # Template XML Generation Methods #--------------------------------------------------------------------- # # Used by resolve_template for recursion into template sub nodes. # def resolve_template_inner( xml_node, env ) if ( xml_node.methods.include?( 'attributes' ) ) then xml_node.attributes.each_value { |a| xml_node.attributes[a.name] = env.subst( a.value ) } end # We don't want to resolve the entire template because then # templates that reuse other environment variables won't get # the correct value (e.g. $(found) in template:file:each and # template:path:each. # #if ( xml_node.methods.include?( 'children' ) ) then # xml_node.children.each { |c| # resolve_template_inner( c, env ) # } #end end # # Turn a template node into real XML nodes with the current environment. # def resolve_template( template_node, env ) # Construct a temporary XML Document, containing a string # representation of our updated template node. This can then be # parsed using our existing mechanism for output = "" fmt = REXML::Formatters::Default.new( ) fmt.write( template_node, output ) temp_doc = Document.new( output ) # Do any environment substitutions in our XML attributes, # and return the new XML. resolve_template_inner( temp_doc.root, env ) temp_doc.root end #--------------------------------------------------------------------- # Template Traverse Methods : Content Tree #--------------------------------------------------------------------- # # This method creates a single content node by applying the XML Nodes # within the template_xml_node tree. # # \param content_set # \param template_xml_node # \param env # def traverse_content( parent, content, template_xml_node, path, env, indent = 0 ) # Walk through each XML Node template element, generating a new # content element as appropriate. template_xml_node.each_element do |template_node| env.push( ) content.fill_env( env ) newobj = parse_node( template_node, path, env ) parent.add_child( newobj ) # DHM FIXME # Substitute any $(export) strings in our paths if we # are processing a target's XML. Also massage any extensions, # if applicable. if ( @target != @project.branches[@branch].ind_target ) if ( content.methods.include?( 'path' ) ) then newobj.path = content.path.gsub( '$(export)', '$(target)' ) end if ( content.methods.include?( 'extension' ) ) then newobj.extension = Resourcing::convert_independent_extension_to_platform( content.extension, @target ) end end saved_cache = @content_lookup_cache # If our content to be substituted has children then we have to # substitute them also to maintain hierarchy. if ( content.methods.include?( 'children' ) ) then content.children.each do |child| @content_lookup_cache = child traverse_content( newobj, child, template_xml_node, path, env, (indent + 1) ) end end @content_lookup_cache = saved_cache env.pop( ) end end # # This method creates content nodes for each content object in # content_set by applying the XML Nodes within the template_xml_node # tree (i.e. the XML nodes below the template in the original XML # document). # # \param parent # \param content_set # \param template_xml_node # \param path # \param env # def traverse_content_set( parent, content_set, template_xml_node, path, env ) throw TypeError.new( 'parent must be able to contain children.' ) \ unless ( parent.methods.include?( 'children' ) ) throw TypeError.new( 'subst_content_set must be an array.' ) \ unless ( Array == content_set.class ) # Walk through each content object in our set, generating a new # substituted content object using environment substitutions where # appropriate. content_set.each do |content| @content_lookup_cache = content content_list = Group.new( 'traverse_content_set' ) parent.add_child( content_list ) traverse_content( content_list, content, template_xml_node, path, env ) end end #--------------------------------------------------------------------- # Template Traverse Methods : Targets #--------------------------------------------------------------------- # # Traverse all enabled project targets for the specified sub-XML-node # tree creating content nodes for that target as we go. # # This essentially replicates a content node tree for a specific # target, substiuting our paths as required (although additional work # will be required at conversion time, i.e. replacing file extension # parameters). # def traverse_targets( parent, xml_node, path, env ) store_target = @target @project.branches[@branch].targets.each_value do |target| # Ignore targets if they are not user-enabled. next unless ( target.enabled ) env.push( ) @target = target @target.fill_env( env ) xml_node.each_element do |xml_child_node| newobj = parse_node( xml_child_node, path, env ) # Substitute any $(export) strings in our paths. if ( newobj.methods.include?( 'path' ) ) then #puts "Replacing path: #{newobj.path}" newobj.path = newobj.path.gsub( '$(export)', '$(target)' ) end parent.add_child( newobj ) end env.pop( ) end @target = store_target end #--------------------------------------------------------------------- # Template Traverse Methods : Filesystem #--------------------------------------------------------------------- # # Traverse an on-disk directory structure. # def traverse_directories( root, ext, relative, addobj, env, templatenode, group ) searchpath = root + relative #print(searchpath + "\n") dir_list = OS::FindEx.find_dirs(searchpath,true) dir_list.each { |d| new_relative = relative.dup + "/" + d currdir = root + "/" + new_relative filelist = OS::FindEx.find_files(currdir + "/*." + ext, true) groupcontent = nil groupcontent = Group.new(new_relative)if group == true and filelist.size > 0 filelist.each { |d| env.push env.add('name', d) env.add('path', new_relative) newobj = parse_template_node(templatenode,searchpath,env) if group == true then groupcontent.add_child(newobj) else addobj.add_child(newobj) end env.pop } addobj.add_child(groupcontent) if group == true and filelist.size > 0 traverse_directories(root, ext, new_relative, addobj, env, templatenode, group) } end # Traverse an on-disk directory structure building pack file content. # def traverse_directories_pack( root, ext, relative, addobj, env, templatenode, handler_class ) searchpath = root + relative #print(searchpath + "\n") dir_list = OS::FindEx.find_dirs(searchpath,true) dir_list.each { |d| new_relative = relative.dup + "/" + d currdir = root + "/" + new_relative filelist = OS::FindEx.find_files(currdir + "/*." + ext, true) packcontent = handler_class.new(new_relative, project.netstream + "/anim/", @target) if filelist.size > 0 filelist.each { |d| env.push env.add('name', OS::Path::remove_extension(d)) env.add('path', new_relative) newobj = parse_template_node(templatenode,root + new_relative,env) packcontent.add_input(newobj) env.pop } addobj.add_child(packcontent) if filelist.size > 0 traverse_directories_pack(root, ext, new_relative, addobj, env, templatenode, handler_class) } end #--------------------------------------------------------------------- # Private Variables #--------------------------------------------------------------------- private @@log = nil end end # Content module end # Pipeline module # %RS_TOOLSLIB%/pipeline/content/parser.rb