[Libosinfo] [osinfo-db-tools PATCH v4 2/3] import: Learn how to deal with URLs

Thu Dec 13 17:38:36 UTC 2018

On Fri, Dec 07, 2018 at 02:33:47PM +0100, Fabiano Fidêncio wrote:
> Signed-off-by: Fabiano Fidêncio <fidencio at redhat.com>
> ---
>  tools/osinfo-db-import.c | 90 ++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 86 insertions(+), 4 deletions(-)
> 
> diff --git a/tools/osinfo-db-import.c b/tools/osinfo-db-import.c
> index 72f21ba..e4b7824 100644
> --- a/tools/osinfo-db-import.c
> +++ b/tools/osinfo-db-import.c
> @@ -134,6 +134,55 @@ static GFile *osinfo_db_import_get_file(GFile *target,
>      return g_file_resolve_relative_path(target, tmp);
>  }
>  
> +static int
> +osinfo_db_import_download_file(const gchar *url,
> +                               gchar **source_file)
> +{
> +    GFile *in = NULL;
> +    GFile *out = NULL;
> +    GError *err = NULL;
> +    gchar *filename = NULL;
> +    GFileCopyFlags flags = G_FILE_COPY_OVERWRITE | G_FILE_COPY_BACKUP;
> +    int ret = -1;
> +
> +    in = g_file_new_for_uri(url);
> +    if (in == NULL)
> +        return -1;
> +
> +    filename = g_file_get_basename(in);
> +    if (filename == NULL)
> +        goto cleanup;
> +
> +    *source_file = g_strdup_printf("%s/%s", g_get_user_cache_dir(), filename);
> +    if (*source_file == NULL)
> +        goto cleanup;
> +
> +    out = g_file_new_for_path(*source_file);
> +    if (out == NULL)
> +        goto cleanup;
> +
> +    if (!g_file_copy(in, out, flags, NULL, NULL, NULL, &err)) {
> +        g_printerr("Could not download file \"%s\": %s\n",
> +                   url, err->message);
> +        goto cleanup;
> +    }
> +
> +    ret = 0;
> +
> + cleanup:
> +    g_free(filename);
> +    if (in != NULL)
> +        g_object_unref(in);
> +    if (out != NULL)
> +        g_object_unref(out);
> +    if (err != NULL)
> +        g_error_free(err);
> +    if (ret != 0)
> +        unlink(*source_file);
> +
> +    return ret;
> +}
> +
>  static int osinfo_db_import_extract(GFile *target,
>                                      const char *source,
>                                      gboolean verbose)
> @@ -143,6 +192,8 @@ static int osinfo_db_import_extract(GFile *target,
>      int ret = -1;
>      int r;
>      GFile *file = NULL;
> +    gchar *source_file = NULL;
> +    const gchar *uri_schemes[] = {"ftp", "http", NULL};
>  
>      arc = archive_read_new();
>  
> @@ -152,9 +203,38 @@ static int osinfo_db_import_extract(GFile *target,
>      if (source != NULL && g_str_equal(source, "-"))
>          source = NULL;
>  
> -    if ((r = archive_read_open_filename(arc, source, 10240)) != ARCHIVE_OK) {
> +    if (source != NULL) {
> +        gboolean download_file = FALSE;
> +        gsize i;
> +
> +        file = g_file_new_for_uri(source);
> +        if (file == NULL)
> +            goto cleanup;
> +
> +        /* The supported uri schemes here are "ftp", "http" and "https".
> +         * However, "https" is not set as part of the array as it matches with
> +         * "http". */
> +        for (i = 0; uri_schemes[i] != NULL; i++) {
> +            if (g_file_has_uri_scheme(file, uri_schemes[i])) {
> +                download_file = TRUE;
> +                break;
> +            }

Rather than checking for URI schemes, can we just call
g_file_is_native()

Also 'file' is no longer required but we've not unrefd it.

> +        }

> +
> +        if (download_file) {
> +            if (osinfo_db_import_download_file(source, &source_file) < 0)

This method creates a GFile for 'source' again which is a bit
of a waste of time given we already have a GFile just above
that we could pass in

> +                goto cleanup;
> +        } else {
> +            source_file = g_strdup(source);
> +        }
> +
> +        if (source_file == NULL)
> +            goto cleanup;
> +    }
> +
> +    if ((r = archive_read_open_filename(arc, source_file, 10240)) != ARCHIVE_OK) {
>          g_printerr("%s: cannot open archive %s: %s\n",
> -                   argv0, source, archive_error_string(arc));
> +                   argv0, source_file, archive_error_string(arc));
>          goto cleanup;
>      }
>  
> @@ -164,7 +244,7 @@ static int osinfo_db_import_extract(GFile *target,
>              break;
>          if (r != ARCHIVE_OK) {
>              g_printerr("%s: cannot read next archive entry in %s: %s\n",
> -                       argv0, source, archive_error_string(arc));
> +                       argv0, source_file, archive_error_string(arc));
>              goto cleanup;
>          }
>  
> @@ -178,7 +258,7 @@ static int osinfo_db_import_extract(GFile *target,
>  
>      if (archive_read_close(arc) != ARCHIVE_OK) {
>          g_printerr("%s: cannot finish reading archive %s: %s\n",
> -                   argv0, source, archive_error_string(arc));
> +                   argv0, source_file, archive_error_string(arc));
>          goto cleanup;
>      }
>  
> @@ -187,6 +267,8 @@ static int osinfo_db_import_extract(GFile *target,
>      archive_read_free(arc);
>      if (file)
>          g_object_unref(file);
> +    unlink(source_file);

This deletes the user's input file which is not nice. We must only
delete the file if it was a temp file we created.

Can we in fact call unlink immediately after archive_read_open_file()
and rely on UNIX open FD semantics.

> +    g_free(source_file);
>      return ret;
>  }

Regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|