On Sun, 13 May 2007 22:46:15 -0700
Mike Mattie <[EMAIL PROTECTED]> wrote:

> Hello,
> 

[snip]

Here is a revised version of library.c . still not at the compile stage but I 
think that the
implementation is now verifiable to the search algorithm by review. This is 
pretty close to what
I would call a final on the level of algorithms, and abstractions. Still about 
half the
file to self review and assert check.

my current version is a little over >200 lines longer than the HEAD 
implementation before
the path utilities are hoisted, and the path table is enumerated else-where.

This is more of a shift of current/future complexity towards a single 
implementation 
point than a bulk addition of complexity; when the enhanced diagnostics, 
new functionality (HLL name-spaces), API insulation etc is considered. At least 
I
think so :) 
 
Some of the unorthodox commenting styles are for my own review of the code. I 
plan
on sanitizing those from the file before RT time.

wc -l {HEAD,mine}/src/library.c
  564 HEAD/src/library.c
  781 mine/src/library.c

> Cheers,
> Mike Mattie - [EMAIL PROTECTED]
> 
> 
> 
> 
/*
Copyright (C) 2004-2007, The Perl Foundation.
$Id: library.c 18482 2007-05-09 11:16:10Z paultcochrane $

=head1 NAME

src/library.c - Interface to Parrot's bytecode library

=head1 DESCRIPTION

This file contains a C function to access parrot's bytecode library functions.

=head2 Functions

=over 4

=cut

*/

#include <stdarg.h>
#include <assert.h>

#include "parrot/parrot.h"
#include "parrot/warnings.h"
#include "parrot/library.h"
#include "parrot/path.h"

#include "library.str"


/* used internally by diagnostics , ASCII text string names for the loader.
 * TODO: internationalization. */
static const char* const loader_names[] = {
    'ARCH',
    'BYTECODE',
    'INCLUDE',
    'SRC'
};

/* create a loader table , indexed by the three loader paths enumerated
 * by enum_runtime_ft in parrot/include/library.h. Each namespace has
 * a path/extension search table for a shared object loader, a byte-code
 * loader, and a source-code loader.
 *
 * for a shared object loader this is a little wastefull, but the tables
 * are not duplicated. If a namespace does not define a path/extension
 * search table, the "parrot" table will be used. */

static PMC*
create_loader_table(Interp* interp)
{
    PMC *table;

    table = pmc_new(interp, enum_class_FixedPMCArray);
    VTABLE_set_integer_native(interp, table,
                              PARROT_RUNTIME_FT_SIZE );

    return table;
}

/* return an existing load-table , or create one and attach it
 * to the namespace. */
static PMC*
get_load_table_for_populate(Interp* interp,
                            PMC* lib_paths, STRING* ns)
{
    PMC *table;

    if ( VTABLE_exists_keyed_str(interp, lib_paths, ns) )
        return VTABLE_get_pmc_keyed_str(interp, lib_paths, ns );

    table = create_loader_table(interp);

    VTABLE_set_pmc_keyed_str(interp, lib_paths,
                             ns,
                             table);
    return table;
}

/* create a search space. Loaders will want to iterate through a
 * path space, and an extension space. These are enumerated
 * in enum_search_space. */

typedef enum {
    SEARCH_TABLE_PATH = 0,
    SEARCH_TABLE_EXT,
    SEARCH_TABLE_SIZE
} enum_search_space;

static PMC*
create_search_table(Interp* interp)
{
    PMC *table;

    table = pmc_new(interp, enum_class_FixedPMCArray);
    VTABLE_set_integer_native(interp, table, SEARCH_TABLE_SIZE);

    return table;
}

static PMC*
get_search_table_for_populate(Interp* interp,
                              PMC* load_table, int loader )
{
    PMC *table;

    if ( VTABLE_exists_keyed_int( interp, load_table, loader ) )
        return VTABLE_get_pmc_keyed_int(interp, load_table, loader );

    table = create_search_table(interp);
    VTABLE_set_pmc_keyed_int(interp, load_table,
                             loader,
                             table);

    return table;
}

/* A search space is a simple dynamic array, or list of
 * name varaiations (path or extension) to try. */

static PMC*
create_search_space(Interp* interp) {
    return pmc_new(interp, enum_class_ResizableStringArray);
}

static PMC*
get_search_space_for_populate(Interp* interp,
                              PMC* search_table , enum_search_space search_space)
{
    PMC *new_table;

    if ( VTABLE_exists_keyed_int( interp, search_table , search_space ) )
        return VTABLE_get_pmc_keyed_int(interp, search_table, search_space );

    new_table = create_search_space(interp);
    VTABLE_set_pmc_keyed_int(interp,
                             search_table, search_space , new_table );

    return new_table;
}

static void
populate_search_space(Interp* interp,
                      /* the loader table for the namespace */
                      PMC* load_table,
                      enum_runtime_ft loader,

                      /* search space index */
                      enum_search_space search_space,

                      /* the entry to add */
                      STRING* entry)
{
    PMC *search_table, *search_list;

    search_list = get_search_space_for_populate(interp,
                                                get_search_table_for_populate(interp,
                                                                              load_table,loader),
                                                search_space);

    VTABLE_push_string(interp, search_list, entry);
}

/* load_prefer is a toggle to prefer either the most low level form of a module
 * (compiled) or the highest level form of a module.
 *
 * Users will typically want the compiled versions. This is also the perl5
 * behavior as well.
 *
 * Users wanting a more dynamic interaction can export PARROT_PREFER_SOURCE
 * to reverse the default behavior.
 *
 * FUTURE: If a value for PARROT_PREFER_SOURCE is honored it should be
 * a path spec of directories for which source will be loaded over compiled
 * objects.
 */

typedef enum {
    PREFER_COMPILE,
    PREFER_SOURCE
} enum_load_prefer;

static enum_load_prefer load_prefer = PREFER_COMPILE;

static int
query_load_prefer ( Interp* interp ) {
    int free_it;
    char *env;

    env = Parrot_getenv("PARROT_PREFER_SOURCE", &free_it);

    if (env) {
        if (free_it)
            mem_sys_free(env);

        return 1;
    }

    return 0;
}

static int
next_by_load_prefer (int current) {
    return current + ( PREFER_COMPILE == load_prefer )
        ? 1
        : -1;
}


static int
bound_by_load_prefer(int *lower_bound, *upper_bound)
{
    int swap;

    if ( PREFER_COMPILE == load_prefer ) {
        *lower_bound = 0;
        return;
    }

    *lower_bound = *upper_bound - 1;
    *upper_bound = -1;
}

/*

=item C<void parrot_init_library_paths(Interp *)>

 TODO: doc.

=cut

*/

#include "builtin-loader-paths.c"

void
parrot_init_library_paths(Interp *interp)
{
    PMC *iglobals, *lib_paths;

    if( query_load_prefer(interp) )
        load_prefer = PREFER_SOURCE;

    /* create lib_paths, a fixed array of hashes.

       The array indexing is for the loader types. The elements of the
       array are a hash implementing a "interpreter" name-space.
     */

    lib_paths = pmc_new(interp, enum_class_Hash);

    populate_builtin_library_paths(interp, lib_paths);

    iglobals = interp->iglobals;
    VTABLE_set_pmc_keyed_int(interp, iglobals,
                             IGLOBALS_LIB_PATHS, lib_paths);
}

static STRING* load_trace; /* used to accumulate a trace of a load
                              for diagnostics & debugging */

#define TRACE_ENABLED \( NULL != load_trace \)
#define SET_TRACE( trace ) { load_trace = trace; }

static void
append_trace (Interp* interp, const char* const format, ... )  {

    STRING* format_string = string_from_cstring( format );
    va_list args;

    va_start(args, format);

    string_append(interp, load_trace, Parrot_sprintf_s(interp, format, args);

    va_end(args);
}

#define SEARCH_TRACE_PSTRING ( format , args... )\
{\
    if( NULL != search_trace ) {\
        string_append(interp,load_trace,\
                      append_trace(interp,format, ## args ));\
    }\
}

#define SEARCH_TRACE_CSTRING ( format , args... )\
{\
    if ( NULL != load_trace ) {\
        string_append(interp, load_trace,\
                      string_printf( format , ## args );\
    }\
}


static PMC* namespace_search_path(Interp *interp,
                                  STRING* hll,
                                  enum_runtime_ft loader,
                                  enum_search_space search_space)
{
    PMC *iglobals, *lib_paths, *loader_table, *search_table, *list;

    iglobals = interp->iglobals;
    lib_paths = VTABLE_get_pmc_keyed_int(interp, iglobals, IGLOBALS_LIB_PATHS);

    /* first do the namespace lookup to find the loader table */

    if ( ! VTABLE_exists_keyed_str(interp, lib_paths, hll) )
        return NULL;

    loader_table = VTABLE_get_pmc_keyed_str(interp, lib_paths, hll );

    /* FixedPmc: look for a entry matching the loader desired */
    if ( ! VTABLE_exists_keyed_int(interp, loader_table, loader) )
        return NULL;

    search_table = VTABLE_get_pmc_keyed_int(interp, loader_table, loader );

    /* FixedPmc: check for the the PATH or EXT search table */
    if ( ! VTABLE_exists_keyed_int(interp, search_table, search_space) )
        return NULL;

    list = VTABLE_get_pmc_keyed_int(interp, search_table, search_space );
    return ( VTABLE_elements(interp, list) )
        ? list
        : NULL;
}

/* implement search space lookup , with a hard-coded default namespace
 * for fallback
 */
static PMC*
get_search_space(Interp *interp,
                 STRING* hll,
                 enum_runtime_ft loader,
                 enum_search_space search_space)
{
    PMC *table;

    if ( NULL == hll ) goto fallback;

    table = namespace_search_path(interp, hll, loader, search_space );

    if ( NULL == table ) {
      fallback:
        hll = CONST_STRING(interp, "parrot" );
        table = namespace_search_path(interp, hll, loader, search_space);
    }

    return table;
}

static PMC*                             /* can be NULL */
path_concat_permutations( STRING *path, PMC *search_space ) {
/* implements search-rule 1 */

    PMC* perm = pmc_new(interp, enum_class_ResizableStringArray);
    int i,n;

    VTABLE_push_string(interp, perm, path);

    if( !ext_search_space )
        return perm;

    n = VTABLE_elements(interp, search_space);
    bound_by_load_prefer(&i, &n);

    while( i != n ) {
        VTABLE_push_string(interp, perm,
                           string_concat(interp, path ,
                                         VTABLE_get_string_keyed_int(interp, search_space, i)));
        i = next_by_load_prefer(i);
    }

    return perm;
}

/* fs_lookup: use stat() to see if the path exists. return a platform
 * localized path if it exists, false if not. This makes the search behavior
 * first match, best by order. */

static STRING*
fs_lookup(Interp *interp, STRING* path) {
    STRING *final;
    final = parrot_path_platform_localize(interp, string_copy(interp, path));

    if (Parrot_stat_info_intval(interp, final , STAT_EXISTS)) {

#ifdef DEBUG_LIBRARY_LOCATE
        TRACE_PSTRING( "found %s\n" , final );
#endif

        return final;
    }

    return NULL;
}

static STRING*
fs_lookup_with_prefix(Interp* interp, STRING *path, *prefix ) {
/* implements search-rule 3 , search-rule 4 */

    STRING* result;

    if ( prefix ) {
        result = fs_lookup(interp, parrot_path_concat(interp, prefix , path));
        if (result)
            return result;
    }

    return fs_lookup( interp, path );
}

static STRING*                                     /* can be NULL */
fs_search_with_prefix( interp, PMC *search_space , STRING* prefix ) {
    int i,n;
    STRING* result;

    assert(search_space);

    for( i = 0 ; i < n ; i++ ) {
        result = fs_lookup_with_prefix(interp,
                                       VTABLE_get_string_keyed_int(interp, search_space, i ),
                                       prefix);
        if (result)
            return result;
    }

    return NULL;
}

/*

=item C<char* Parrot_locate_runtime_file(Interp *,
                                         const char *object_name,
                                         STRING *hll
                                         enum_runtime_ft *loader
                                         STRING* trace)>

Parrot_locate_runtime_str searches the filesystem for object files
containing code of some sort. This function is required by several
opcodes/components and is designed with parrot's unusual flexiblity in
mind.

object_name : the name of the object to load. It can be simply a name,
              path, or absolute path.

hll         : The hll argument is the key to the HLL name-space. The
              default namespace is used if the namespace does not exist
              or is null.

              The default name-space is "parrot".

loader      : a bit-mask selecting loaders to be included in the search.
              This arguement is passed by reference. When a matching file
              is found the value of loader is reset to the flag for the
              loader under which it was found.

              loader is not modified unless a matching file is found.

trace       : given non-null string a trace of the search will be appended.
              This allows higher level API's to capture detailed diagnostics
              when the search fails. This is useful for all parties.

The return value is:

  *  a string containing the path to the objects preferred form, or NULL if not found.

  * localized with parrot_platform_path_localize

      *  all path seperators are coverted to the platform's value (ie Win32)

      *  a hidden 0 char is appended making it suitable for direct use in C API calls,
         an artifact of the previous implementation.

Search behavior:

code objects can have several formats, some of which will be stored on
disk. This routine searches for the preferred format of an object.
Preferred is either lowest form (compiled), or highest form (source).

The given search rules apply at each step in the search order. The
search order is executed per loader. The loader mask is also traversed
in preferred the order.

rule 1: parrot will always try the object_name as given before adding
        extensions. The set of extensions tried is loader specific.

Search order:

     1. Absolute paths (abort on fail)

     2. Search the paths joined with get_runtime_prefix

     3. joined with get_runtime_prefix

     4. as given

     Exception 1. absolute paths in the path list are not prefixed

     Exception 2. If the loader mask is zero: step two of the search order
                  will be skipped. extensions are skipped since there
                  is no loader information available.

The two modals for the search are the runtime_prefix and preferred
order.

SEE ALSO: Parrot_get_runtime_prefix, query_load_prefer , F<include/parrot/library.h>

current parrot behavior can be achieved by passing NULL as the hll
argument, and a loader mask of:

  PARROT_RUNTIME_FT_BYTECODE &
  PARROT_RUNTIME_FT_INCLUDE &
  PARROT_RUNTIME_FT_SOURCE

for parrot bytecode, and PARROT_RUNTIME_FT_ARCH for platform shared
objects.

Implementation Notes:

The implementation of the search behavior is tagged in this file.
rules            :  grep search-rule x
search order     :  grep search-order x
search exception :  grep search-rule x

Is it theoretically possible that the tags could be overlayed onto a
call graph of this function and implementation can be verified to be
faithful to search behavior with static analysis ? Practically could
this be possible with execution paths restricted by coverage analysis ?

The search lists for both paths and extensions are dynamic arrays. In
the extension search space it is assumed that the lowest index
corresponds to the lowest form, and that the array is sorted
accordingly.

TODO: the extension , which is actually the stage of interpretation contained
      by the format is returned in the extension of the file. This should be
      returned as a optimization hint to heuristics that do the real
      determination of what's in a file , ( example: use v6; or shebang invocations )

TODO: instead of a string that is checked by stat() , a handle should be
      returned instead to close the classic access() race. Additional
      flags are needed for that such as NO_TTY and other basic cross-platform
      security open() masks. <-- huge warning.

      (This should be relative to a loader , higher level forms may
       have looser security constraints)

TODO: OS IO/VM hinting. some loaders could benefit from IO hinting such as
      mapped/streamed, use-once etc. depends on returning a handle and open
      flags.

=cut

*/

/* compute a bit-flag from a index of the loader bits. Need to check endian issues */

static int
ft_index_to_mask (int index ) {
    return 1 << index;
}

/* we have a multi-value return. Ensure that code does not forget to
 * set one of the expected returns by encapsulating return value
 * construction in a macro */

#define return_if_found ( path , for_loader ) if( path ) { *loader = for_loader ; return path }

STRING*
Parrot_locate_runtime_file_str(Interp *interp,
                               STRING *object_name,
                               STRING *hll,
                               enum_runtime_ft *loader,
                               STRING* trace)
{
    STRING *prefix, *file;
    int l_idx, l_bound;

    /* set the static trace variable, NULL to disable or a string for the
       diagnostics data. */
    SET_TRACE( trace );
    SEARCH_TRACE_PSTRING( "looking up path for object: %s\n" , object_name );

    /* get the value of PARROT_RUNTIME if any, a constant for the paths below */
    prefix = Parrot_get_runtime_prefix(interp);
    if( TRACE_ENABLED
        && prefix
        && string_length(interp, prefix)) append_trace(interp, "prefix is set as %s\n",prefix);

    if ( !*loader ) {
        /* search-exception 2
         *
         * If it is an absolute path try it now. If it fails skip the rest
         * of the search for this loader */

        SEARCH_TRACE_CSTRING( "all loaders disabled, path search disabled\n" );

        file = fs_lookup_with_prefix(interp,
                                     object_name,
                                     ( !parrot_path_is_abs(interp, object_name) ? prefix : NULL  ));

        return_if_found( file , 0 );
        return NULL;
    }

    l_bound = PARROT_RUNTIME_FT_SIZE;
    bound_by_load_prefer(&l_idx, &l_bound);

    while( l_idx != l_bound ) {
        PMC *object_search_space, *path_search_space;
        int i, n;

        /* ingore loaders not in the mask */
        if (0 == (ft_index_to_mask(l_idx) & *loader)) {

            l_idx = next_by_load_prefer(l_idx);
            continue;
        }

        SEARCH_TRACE_CSTRING( "looking for objects loadable by %s\n", loader_names[l_idx] );

        object_search_space  = path_concat_permutations(interp,
                                                        object_name,
                                                        get_search_space(interp,
                                                                         hll,
                                                                         *loader ,
                                                                         SEARCH_TABLE_EXT ));
        if ( TRACE_ENABLED ) {
            n = VTABLE_elements(interp, object_search_space);
            for( i = 0 ; i < n ; i++ )
                SEARCH_TRACE_PSTRING( "object  %s,\n" ,
                                      VTABLE_get_string_keyed_int(interp,
                                                                  object_search_space, i ));
        }

        /* If it is an absolute path try it now. If it fails skip the rest
         of the search for this loader */

        if (parrot_path_is_abs(interp, object_name)) {
          no_paths_available:
            /* search-order 3 , search-order 4 */


            file = fs_search_with_prefix(interp, object_search_space, NULL);
            return_if_found( file , ft_index_to_mask(l_idx) );

            l_idx = next_by_load_prefer(l_idx);
            continue;
        }

        /* we should always get a path search space unless the "parrot" search
         * space is broken in the tree. An extension table is not required. */

        path_search_space = get_search_space(interp, hll, *loader , SEARCH_TABLE_PATH );
        if ( !path_search_space ) {
            Parrot_warn(interp,PARROT_WARNINGS_UNDEF_FLAG,
                        "the parrot search paths are undefined including those built-in to parrot !\n");
            goto no_paths_available;
        }

        /* now iterate through the paths,
         * incorperating the value of PARROT_RUNTIME as well */

        n = VTABLE_elements(interp, path_search_space);

        for( i = 0; i < n ; i++ ) {
            STRING *path = VTABLE_get_string_keyed_int(interp, path_search_space, i);
            SEARCH_TRACE_PSTRING( "looking in path: %s\n" , path );

            if ( prefix
                 && string_length(interp, prefix)
                 && !parrot_path_is_abs(interp,path)) {
                /* search-exception 1 */

                path = parrot_path_concat(interp, prefix , path);
            }

            file = fs_search_with_prefix(interp, object_search_space, path);
            return_if_found( file , ft_index_to_mask(l_idx) );
            /* search-order 2 */
        }

        l_idx = next_by_load_prefer(l_idx);
    }

    return NULL;
}

static STRING*
query_runtime_prefix ( Interp* interp ) {

    STRING* prefix;

    int free_it;
    char *env;

    env = Parrot_getenv("PARROT_RUNTIME", &free_it);

    if (env) {
        prefix = string_from_cstring(interp, env, 0);
        if (free_it)
            mem_sys_free(env);

        return prefix;
    }

    return NULL;
}

/*

=item C<STRING* Parrot_get_runtime_prefix(Interp * )>

return the runtime prefix in the PMC string C<prefix>. The
config hash is used first if given, then the value of the
environment variable PARROT_RUNTIME. If neither are found
NULL is returned.
=cut

*/

STRING*
Parrot_get_runtime_prefix (Interp *interp ) {

    PMC *config_hash;

    STRING *key, *can_fail; /* can_fail , for storing string pointers from
                               functions that may fail to return a prefix value
                      */

    /* first look in the config hash for a user specified path */

    config_hash = VTABLE_get_pmc_keyed_int(interp, interp->iglobals,
            (INTVAL) IGLOBALS_CONFIG_HASH);

    if (VTABLE_elements(interp, config_hash)) {
        key = CONST_STRING(interp, "prefix");
        can_fail = VTABLE_get_string_keyed_str(interp, config_hash, key);

        if ( can_fail ) {
            /*
              TODO:
              shouldn't we do some sanity here ?  , assuming this can be
              set by random code/input we should see if it even exists.
            */

            return can_fail;
        }
    }

    /*
      fallback:

      no value was found in the config hash so try a system query, if
      that fails as well return the default.
    */

    return query_runtime_prefix(interp);
}

/*

=back

=head1 SEE ALSO

F<include/parrot/library.h>

=cut

*/


/*
 * Local variables:
 *   c-file-style: "parrot"
 * End:
 * vim: expandtab shiftwidth=4:
 */

Attachment: signature.asc
Description: PGP signature

Reply via email to